From: Keith Randall Date: Thu, 30 Jan 2020 18:17:01 +0000 (-0800) Subject: cmd/compile: insert complicated x86 addressing modes as a separate pass X-Git-Tag: go1.15beta1~914 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=98cb76799c3053e779c4e1b61bb50705d25dd77f;p=gostls13.git cmd/compile: insert complicated x86 addressing modes as a separate pass Use a separate compiler pass to introduce complicated x86 addressing modes. Loads in the normal architecture rules (for x86 and all other platforms) can have constant offsets (AuxInt values) and symbols (Aux values), but no more. The complex addressing modes (x+y, x+2*y, etc.) are introduced in a separate pass that combines loads with LEAQx ops. Organizing rewrites this way simplifies the number of rewrites required, as there are lots of different rule orderings that have to be specified to ensure these complex addressing modes are always found if they are possible. Update #36468 Change-Id: I5b4bf7b03a1e731d6dfeb9ef19b376175f3b4b44 Reviewed-on: https://go-review.googlesource.com/c/go/+/217097 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Josh Bleecher Snyder --- diff --git a/src/cmd/compile/internal/ssa/addressingmodes.go b/src/cmd/compile/internal/ssa/addressingmodes.go new file mode 100644 index 0000000000..8874b56a9b --- /dev/null +++ b/src/cmd/compile/internal/ssa/addressingmodes.go @@ -0,0 +1,154 @@ +// Copyright 2020 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +// addressingModes combines address calculations into memory operations +// that can perform complicated addressing modes. +func addressingModes(f *Func) { + switch f.Config.arch { + default: + // Most architectures can't do this. + return + case "amd64": + // TODO: 386, s390x? + } + + var tmp []*Value + for _, b := range f.Blocks { + for _, v := range b.Values { + if !combineFirst[v.Op] { + continue + } + p := v.Args[0] + c, ok := combine[[2]Op{v.Op, p.Op}] + if !ok { + continue + } + // See if we can combine the Aux/AuxInt values. + switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} { + case [2]auxType{auxSymOff, auxInt32}: + // TODO: introduce auxSymOff32 + if !is32Bit(v.AuxInt + p.AuxInt) { + continue + } + v.AuxInt += p.AuxInt + case [2]auxType{auxSymOff, auxSymOff}: + if v.Aux != nil && p.Aux != nil { + continue + } + if !is32Bit(v.AuxInt + p.AuxInt) { + continue + } + if p.Aux != nil { + v.Aux = p.Aux + } + v.AuxInt += p.AuxInt + case [2]auxType{auxSymValAndOff, auxInt32}: + vo := ValAndOff(v.AuxInt) + if !vo.canAdd(p.AuxInt) { + continue + } + v.AuxInt = vo.add(p.AuxInt) + case [2]auxType{auxSymValAndOff, auxSymOff}: + vo := ValAndOff(v.AuxInt) + if v.Aux != nil && p.Aux != nil { + continue + } + if !vo.canAdd(p.AuxInt) { + continue + } + if p.Aux != nil { + v.Aux = p.Aux + } + v.AuxInt = vo.add(p.AuxInt) + case [2]auxType{auxSymOff, auxNone}: + // nothing to do + case [2]auxType{auxSymValAndOff, auxNone}: + // nothing to do + default: + f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op) + } + // Combine the operations. + tmp = append(tmp[:0], v.Args[1:]...) + v.resetArgs() + v.Op = c + v.AddArgs(p.Args...) + v.AddArgs(tmp...) + } + } +} + +// combineFirst contains ops which appear in combine as the +// first part of the key. +var combineFirst = map[Op]bool{} + +func init() { + for k := range combine { + combineFirst[k[0]] = true + } +} + +// For each entry k, v in this map, if we have a value x with: +// x.Op == k[0] +// x.Args[0].Op == k[1] +// then we can set x.Op to v and set x.Args like this: +// x.Args[0].Args + x.Args[1:] +// Additionally, the Aux/AuxInt from x.Args[0] is merged into x. +var combine = map[[2]Op]Op{ + [2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1, + [2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1, + [2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1, + [2]Op{OpAMD64MOVQload, OpAMD64ADDQ}: OpAMD64MOVQloadidx1, + [2]Op{OpAMD64MOVSSload, OpAMD64ADDQ}: OpAMD64MOVSSloadidx1, + [2]Op{OpAMD64MOVSDload, OpAMD64ADDQ}: OpAMD64MOVSDloadidx1, + + [2]Op{OpAMD64MOVBstore, OpAMD64ADDQ}: OpAMD64MOVBstoreidx1, + [2]Op{OpAMD64MOVWstore, OpAMD64ADDQ}: OpAMD64MOVWstoreidx1, + [2]Op{OpAMD64MOVLstore, OpAMD64ADDQ}: OpAMD64MOVLstoreidx1, + [2]Op{OpAMD64MOVQstore, OpAMD64ADDQ}: OpAMD64MOVQstoreidx1, + [2]Op{OpAMD64MOVSSstore, OpAMD64ADDQ}: OpAMD64MOVSSstoreidx1, + [2]Op{OpAMD64MOVSDstore, OpAMD64ADDQ}: OpAMD64MOVSDstoreidx1, + + [2]Op{OpAMD64MOVBstoreconst, OpAMD64ADDQ}: OpAMD64MOVBstoreconstidx1, + [2]Op{OpAMD64MOVWstoreconst, OpAMD64ADDQ}: OpAMD64MOVWstoreconstidx1, + [2]Op{OpAMD64MOVLstoreconst, OpAMD64ADDQ}: OpAMD64MOVLstoreconstidx1, + [2]Op{OpAMD64MOVQstoreconst, OpAMD64ADDQ}: OpAMD64MOVQstoreconstidx1, + + [2]Op{OpAMD64MOVBload, OpAMD64LEAQ1}: OpAMD64MOVBloadidx1, + [2]Op{OpAMD64MOVWload, OpAMD64LEAQ1}: OpAMD64MOVWloadidx1, + [2]Op{OpAMD64MOVWload, OpAMD64LEAQ2}: OpAMD64MOVWloadidx2, + [2]Op{OpAMD64MOVLload, OpAMD64LEAQ1}: OpAMD64MOVLloadidx1, + [2]Op{OpAMD64MOVLload, OpAMD64LEAQ4}: OpAMD64MOVLloadidx4, + [2]Op{OpAMD64MOVLload, OpAMD64LEAQ8}: OpAMD64MOVLloadidx8, + [2]Op{OpAMD64MOVQload, OpAMD64LEAQ1}: OpAMD64MOVQloadidx1, + [2]Op{OpAMD64MOVQload, OpAMD64LEAQ8}: OpAMD64MOVQloadidx8, + [2]Op{OpAMD64MOVSSload, OpAMD64LEAQ1}: OpAMD64MOVSSloadidx1, + [2]Op{OpAMD64MOVSSload, OpAMD64LEAQ4}: OpAMD64MOVSSloadidx4, + [2]Op{OpAMD64MOVSDload, OpAMD64LEAQ1}: OpAMD64MOVSDloadidx1, + [2]Op{OpAMD64MOVSDload, OpAMD64LEAQ8}: OpAMD64MOVSDloadidx8, + + [2]Op{OpAMD64MOVBstore, OpAMD64LEAQ1}: OpAMD64MOVBstoreidx1, + [2]Op{OpAMD64MOVWstore, OpAMD64LEAQ1}: OpAMD64MOVWstoreidx1, + [2]Op{OpAMD64MOVWstore, OpAMD64LEAQ2}: OpAMD64MOVWstoreidx2, + [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ1}: OpAMD64MOVLstoreidx1, + [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ4}: OpAMD64MOVLstoreidx4, + [2]Op{OpAMD64MOVLstore, OpAMD64LEAQ8}: OpAMD64MOVLstoreidx8, + [2]Op{OpAMD64MOVQstore, OpAMD64LEAQ1}: OpAMD64MOVQstoreidx1, + [2]Op{OpAMD64MOVQstore, OpAMD64LEAQ8}: OpAMD64MOVQstoreidx8, + [2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ1}: OpAMD64MOVSSstoreidx1, + [2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ4}: OpAMD64MOVSSstoreidx4, + [2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ1}: OpAMD64MOVSDstoreidx1, + [2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ8}: OpAMD64MOVSDstoreidx8, + + [2]Op{OpAMD64MOVBstoreconst, OpAMD64LEAQ1}: OpAMD64MOVBstoreconstidx1, + [2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ1}: OpAMD64MOVWstoreconstidx1, + [2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ2}: OpAMD64MOVWstoreconstidx2, + [2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ1}: OpAMD64MOVLstoreconstidx1, + [2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ4}: OpAMD64MOVLstoreconstidx4, + [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1, + [2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8, + + // TODO: 386 +} diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 2de4e133bf..9ec4252def 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -442,6 +442,7 @@ var passes = [...]pass{ {name: "insert resched checks", fn: insertLoopReschedChecks, disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops. {name: "lower", fn: lower, required: true}, + {name: "addressing modes", fn: addressingModes, required: false}, {name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again {name: "lowered cse", fn: cse}, {name: "elim unread autos", fn: elimUnreadAutos}, diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index c165fed485..1d24d780c6 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1043,12 +1043,6 @@ (MOVWQZX x) && zeroUpper48Bits(x,3) -> x (MOVBQZX x) && zeroUpper56Bits(x,3) -> x -(MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) -(MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) -(MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) -(MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx1 [off] {sym} ptr idx mem) -(MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVLloadidx4 [off] {sym} ptr idx mem) - // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBQZX x) (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWQZX x) @@ -1166,86 +1160,6 @@ && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) -// generating indexed loads and stores -(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L|Q|SS|SD)loadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOV(L|SS)load [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|SS)loadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) -(MOV(L|Q|SD)load [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|Q|SD)loadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - -(MOV(B|W|L|Q|SS|SD)store [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L|Q|SS|SD)storeidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOV(L|SS)store [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|SS)storeidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) -(MOV(L|Q|SD)store [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> - (MOV(L|Q|SD)storeidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - -(MOV(B|W|L|Q|SS|SD)load [off] {sym} (ADDQ ptr idx) mem) && ptr.Op != OpSB -> - (MOV(B|W|L|Q|SS|SD)loadidx1 [off] {sym} ptr idx mem) -(MOV(B|W|L|Q|SS|SD)store [off] {sym} (ADDQ ptr idx) val mem) && ptr.Op != OpSB -> - (MOV(B|W|L|Q|SS|SD)storeidx1 [off] {sym} ptr idx val mem) - -(MOV(B|W|L|Q)storeconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) -(MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) && canMergeSym(sym1, sym2) -> - (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - -(MOV(B|W|L|Q)storeconst [x] {sym} (ADDQ ptr idx) mem) -> (MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr idx mem) - -// combine SHLQ into indexed loads and stores -(MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWloadidx2 [c] {sym} ptr idx mem) -(MOV(L|SS)loadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOV(L|SS)loadidx4 [c] {sym} ptr idx mem) -(MOV(L|Q|SD)loadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOV(L|Q|SD)loadidx8 [c] {sym} ptr idx mem) - -(MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) -> (MOVWstoreidx2 [c] {sym} ptr idx val mem) -(MOV(L|SS)storeidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) -> (MOV(L|SS)storeidx4 [c] {sym} ptr idx val mem) -(MOV(L|Q|SD)storeidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) -> (MOV(L|Q|SD)storeidx8 [c] {sym} ptr idx val mem) -(MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) -> (MOVWstoreconstidx2 [c] {sym} ptr idx mem) -(MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) -> (MOVLstoreconstidx4 [c] {sym} ptr idx mem) -(MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) -> (MOVQstoreconstidx8 [c] {sym} ptr idx mem) - -// combine ADDQ into pointer of indexed loads and stores -(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) -(MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOVWloadidx2 [c+d] {sym} ptr idx mem) -(MOV(L|SS)loadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|SS)loadidx4 [c+d] {sym} ptr idx mem) -(MOV(L|Q|SD)loadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) && is32Bit(c+d) -> (MOV(L|Q|SD)loadidx8 [c+d] {sym} ptr idx mem) - -(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) -(MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) -(MOV(L|SS)storeidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|SS)storeidx4 [c+d] {sym} ptr idx val mem) -(MOV(L|Q|SD)storeidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) && is32Bit(c+d) -> (MOV(L|Q|SD)storeidx8 [c+d] {sym} ptr idx val mem) - - -// combine ADDQ into index of indexed loads and stores -(MOV(B|W|L|Q|SS|SD)loadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)loadidx1 [c+d] {sym} ptr idx mem) -(MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+2*d) -> (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) -(MOV(L|SS)loadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+4*d) -> (MOV(L|SS)loadidx4 [c+4*d] {sym} ptr idx mem) -(MOV(L|Q|SD)loadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)loadidx8 [c+8*d] {sym} ptr idx mem) - -(MOV(B|W|L|Q|SS|SD)storeidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+d) -> (MOV(B|W|L|Q|SS|SD)storeidx1 [c+d] {sym} ptr idx val mem) -(MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+2*d) -> (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) -(MOV(L|SS)storeidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+4*d) -> (MOV(L|SS)storeidx4 [c+4*d] {sym} ptr idx val mem) -(MOV(L|Q|SD)storeidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) && is32Bit(c+8*d) -> (MOV(L|Q|SD)storeidx8 [c+8*d] {sym} ptr idx val mem) - -(MOV(B|W|L|Q)storeconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) && ValAndOff(x).canAdd(c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) - -(MOV(B|W|L|Q)storeconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(c) -> (MOV(B|W|L|Q)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) -(MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(2*c) -> (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) -(MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(4*c) -> (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) -(MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) && ValAndOff(x).canAdd(8*c) -> (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) - // fold LEAQs together (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x) @@ -1274,6 +1188,17 @@ (LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y) +// LEAQ[1248] into LEAQ[1248]. Only some such merges are possible. +(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) +(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) -> + (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) +(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+2*off2) && sym2 == nil -> + (LEAQ4 [off1+2*off2] {sym1} x y) +(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(off1+4*off2) && sym2 == nil -> + (LEAQ8 [off1+4*off2] {sym1} x y) +// TODO: more? + // Absorb InvertFlags into branches. (LT (InvertFlags cmp) yes no) -> (GT cmp yes no) (GT (InvertFlags cmp) yes no) -> (LT cmp yes no) @@ -1552,60 +1477,65 @@ // Little-endian loads -(ORL x0:(MOVBload [i0] {s} p mem) - sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) +(ORL x0:(MOVBload [i0] {s} p0 mem) + sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) + -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) -(ORQ x0:(MOVBload [i0] {s} p mem) - sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) +(ORQ x0:(MOVBload [i0] {s} p0 mem) + sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) + -> @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) -(ORL x0:(MOVWload [i0] {s} p mem) - sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) +(ORL x0:(MOVWload [i0] {s} p0 mem) + sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) + -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) -(ORQ x0:(MOVWload [i0] {s} p mem) - sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) +(ORQ x0:(MOVWload [i0] {s} p0 mem) + sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) + -> @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) -(ORQ x0:(MOVLload [i0] {s} p mem) - sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) +(ORQ x0:(MOVLload [i0] {s} p0 mem) + sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) + -> @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) (ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) + s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) + s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) && i1 == i0+1 && j1 == j0+8 @@ -1615,14 +1545,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p mem)) y) + -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p0 mem)) y) (ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) + s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) + s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) && i1 == i0+1 && j1 == j0+8 @@ -1632,14 +1563,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p mem)) y) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p0 mem)) y) (ORQ - s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) + s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ - s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) + s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y)) && i1 == i0+2 && j1 == j0+16 @@ -1649,296 +1581,105 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p mem)) y) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p0 mem)) y) // Little-endian indexed loads -(ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) - sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - && i1 == i0+1 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - -(ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) - sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - && i1 == i0+1 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - -(ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) - sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) - && i1 == i0+2 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) - -(ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) - sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) - && i1 == i0+2 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) - -(ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) - sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem))) - && i1 == i0+4 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem) - -(ORL - s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) - or:(ORL - s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) - y)) - && i1 == i0+1 - && j1 == j0+8 - && j0 % 16 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) - -(ORQ - s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) - or:(ORQ - s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) - y)) - && i1 == i0+1 - && j1 == j0+8 - && j0 % 16 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) - -(ORQ - s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) - or:(ORQ - s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) - y)) - && i1 == i0+2 - && j1 == j0+16 - && j0 % 32 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y) +// Move constants offsets from LEAQx up into load. This lets the above combining +// rules discover indexed load-combining instances. +(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) +(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) +(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) +(MOV(B|W|L|Q)load [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)load [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) + +(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) +(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) +(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) +(MOV(B|W|L|Q)store [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) && i1 != 0 && is32Bit(i0+i1) +-> (MOV(B|W|L|Q)store [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) // Big-endian loads (ORL - x1:(MOVBload [i1] {s} p mem) - sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) - && i1 == i0+1 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) - -(ORQ - x1:(MOVBload [i1] {s} p mem) - sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) - && i1 == i0+1 - && x0.Uses == 1 - && x1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) - -(ORL - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) - sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) - && i1 == i0+2 - && x0.Uses == 1 - && x1.Uses == 1 - && r0.Uses == 1 - && r1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) - -(ORQ - r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) - sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) - && i1 == i0+2 - && x0.Uses == 1 - && x1.Uses == 1 - && r0.Uses == 1 - && r1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) - -(ORQ - r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) - sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) - && i1 == i0+4 - && x0.Uses == 1 - && x1.Uses == 1 - && r0.Uses == 1 - && r1.Uses == 1 - && sh.Uses == 1 - && mergePoint(b,x0,x1) != nil - && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p mem)) - -(ORL - s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) - or:(ORL - s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) - y)) - && i1 == i0+1 - && j1 == j0-8 - && j1 % 16 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) - -(ORQ - s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) - or:(ORQ - s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) - y)) - && i1 == i0+1 - && j1 == j0-8 - && j1 % 16 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) - -(ORQ - s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) - or:(ORQ - s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) - y)) - && i1 == i0+2 - && j1 == j0-16 - && j1 % 32 == 0 - && x0.Uses == 1 - && x1.Uses == 1 - && r0.Uses == 1 - && r1.Uses == 1 - && s0.Uses == 1 - && s1.Uses == 1 - && or.Uses == 1 - && mergePoint(b,x0,x1,y) != nil - && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p mem))) y) - -// Big-endian indexed loads - -(ORL - x1:(MOVBloadidx1 [i1] {s} p idx mem) - sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) + x1:(MOVBload [i1] {s} p0 mem) + sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem)) + -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) (ORQ - x1:(MOVBloadidx1 [i1] {s} p idx mem) - sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) + x1:(MOVBload [i1] {s} p0 mem) + sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) && i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem)) + -> @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) (ORL - r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) - sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) + r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) + sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem)) + -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) (ORQ - r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) - sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) + r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) + sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) && i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem)) + -> @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) (ORQ - r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) - sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem)))) + r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) + sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) && i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQloadidx1 [i0] {s} p idx mem)) + -> @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p0 mem)) (ORL - s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) + s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL - s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) + s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) && i1 == i0+1 && j1 == j0-8 @@ -1948,14 +1689,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) + -> @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) (ORQ - s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) + s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ - s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) + s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) && i1 == i0+1 && j1 == j0-8 @@ -1965,14 +1707,15 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) (ORQ - s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) + s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ - s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) + s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y)) && i1 == i0+2 && j1 == j0-16 @@ -1984,70 +1727,41 @@ && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 + && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem))) y) + -> @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p0 mem))) y) // Combine 2 byte stores + shift into rolw 8 + word store -(MOVBstore [i] {s} p w - x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) +(MOVBstore [i] {s} p1 w + x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) && x0.Uses == 1 + && same(p0, p1, 1) && clobber(x0) - -> (MOVWstore [i-1] {s} p (ROLWconst [8] w) mem) - -(MOVBstoreidx1 [i] {s} p idx w - x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem)) - && x0.Uses == 1 - && clobber(x0) - -> (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst [8] w) mem) + -> (MOVWstore [i-1] {s} p0 (ROLWconst [8] w) mem) // Combine stores + shifts into bswap and larger (unaligned) stores -(MOVBstore [i] {s} p w - x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) - x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) - x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) - && x0.Uses == 1 - && x1.Uses == 1 - && x2.Uses == 1 - && clobber(x0, x1, x2) - -> (MOVLstore [i-3] {s} p (BSWAPL w) mem) - -(MOVBstoreidx1 [i] {s} p idx w - x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) - x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) - x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem)))) +(MOVBstore [i] {s} p3 w + x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) + x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) + x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 + && same(p0, p1, 1) + && same(p1, p2, 1) + && same(p2, p3, 1) && clobber(x0, x1, x2) - -> (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL w) mem) - -(MOVBstore [i] {s} p w - x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) - x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) - x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) - x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) - x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) - x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) - x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) - && x0.Uses == 1 - && x1.Uses == 1 - && x2.Uses == 1 - && x3.Uses == 1 - && x4.Uses == 1 - && x5.Uses == 1 - && x6.Uses == 1 - && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstore [i-7] {s} p (BSWAPQ w) mem) - -(MOVBstoreidx1 [i] {s} p idx w - x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) - x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) - x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) - x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) - x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) - x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) - x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem)))))))) + -> (MOVLstore [i-3] {s} p0 (BSWAPL w) mem) + +(MOVBstore [i] {s} p7 w + x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) + x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) + x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) + x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) + x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) + x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) + x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) && x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 @@ -2055,147 +1769,99 @@ && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 + && same(p0, p1, 1) + && same(p1, p2, 1) + && same(p2, p3, 1) + && same(p3, p4, 1) + && same(p4, p5, 1) + && same(p5, p6, 1) + && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) - -> (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ w) mem) + -> (MOVQstore [i-7] {s} p0 (BSWAPQ w) mem) // Combine constant stores into larger (unaligned) stores. -(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) +(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) -(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) +(MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) -(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + -> (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) +(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) -(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) +(MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) -(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + -> (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) +(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - -> (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) -(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) + -> (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) +(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) && config.useSSE && x.Uses == 1 + && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - -> (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) - -(MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() - && clobber(x) - -> (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) -(MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() - && clobber(x) - -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) -(MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() - && clobber(x) - -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) - -(MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() - && clobber(x) - -> (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst [1] i) mem) -(MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) - && x.Uses == 1 - && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() - && clobber(x) - -> (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + -> (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) // Combine stores into larger (unaligned) stores. -(MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVWstore [i-1] {s} p w mem) -(MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHR(W|L|Q)const [8] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVWstore [i] {s} p w mem) -(MOVBstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p w0:(SHR(L|Q)const [j-8] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVWstore [i-1] {s} p w0 mem) -(MOVWstore [i] {s} p (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p w mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVLstore [i-2] {s} p w mem) -(MOVWstore [i] {s} p (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p w0:(SHR(L|Q)const [j-16] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVLstore [i-2] {s} p w0 mem) -(MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVQstore [i-4] {s} p w mem) -(MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVQstore [i-4] {s} p w0 mem) - -(MOVBstoreidx1 [i] {s} p idx (SHR(W|L|Q)const [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVWstoreidx1 [i-1] {s} p idx w mem) -(MOVBstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHR(L|Q)const [j-8] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVWstoreidx1 [i-1] {s} p idx w0 mem) -(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) +(MOVBstore [i] {s} p1 (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p idx w mem) -(MOVWstoreidx1 [i] {s} p idx (SHR(L|Q)const [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHR(L|Q)const [j-16] w) mem)) + -> (MOVWstore [i-1] {s} p0 w mem) +(MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHR(W|L|Q)const [8] w) mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p idx w0 mem) -(MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) + -> (MOVWstore [i] {s} p0 w mem) +(MOVBstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHR(L|Q)const [j-8] w) mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVQstoreidx1 [i-4] {s} p idx w mem) -(MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) + -> (MOVWstore [i-1] {s} p0 w0 mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVQstoreidx1 [i-4] {s} p idx w0 mem) - -(MOVWstoreidx2 [i] {s} p idx (SHR(L|Q)const [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) + -> (MOVLstore [i-2] {s} p0 w mem) +(MOVWstore [i] {s} p1 (SHR(L|Q)const [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHR(L|Q)const [j-16] w) mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst [1] idx) w mem) -(MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) + -> (MOVLstore [i-2] {s} p0 w0 mem) +(MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVLstoreidx1 [i-2] {s} p (SHLQconst [1] idx) w0 mem) -(MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) + -> (MOVQstore [i-4] {s} p0 w mem) +(MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) && x.Uses == 1 + && same(p0, p1, 1) && clobber(x) - -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst [2] idx) w mem) -(MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) - && x.Uses == 1 - && clobber(x) - -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst [2] idx) w0 mem) + -> (MOVQstore [i-4] {s} p0 w0 mem) (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) @@ -2320,41 +1986,6 @@ (BSFQ (ORQconst [1<<8] (MOVBQZX x))) -> (BSFQ (ORQconst [1<<8] x)) (BSFQ (ORQconst [1<<16] (MOVWQZX x))) -> (BSFQ (ORQconst [1<<16] x)) -// Simplify indexed loads/stores -(MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVBstore [i+c] {s} p w mem) -(MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVWstore [i+c] {s} p w mem) -(MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVLstore [i+c] {s} p w mem) -(MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVQstore [i+c] {s} p w mem) -(MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+2*c) -> (MOVWstore [i+2*c] {s} p w mem) -(MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVLstore [i+4*c] {s} p w mem) -(MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVLstore [i+8*c] {s} p w mem) -(MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVQstore [i+8*c] {s} p w mem) -(MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSSstore [i+c] {s} p w mem) -(MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+4*c) -> (MOVSSstore [i+4*c] {s} p w mem) -(MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+c) -> (MOVSDstore [i+c] {s} p w mem) -(MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) && is32Bit(i+8*c) -> (MOVSDstore [i+8*c] {s} p w mem) -(MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVBload [i+c] {s} p mem) -(MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVWload [i+c] {s} p mem) -(MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVLload [i+c] {s} p mem) -(MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVQload [i+c] {s} p mem) -(MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+2*c) -> (MOVWload [i+2*c] {s} p mem) -(MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVLload [i+4*c] {s} p mem) -(MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVLload [i+8*c] {s} p mem) -(MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVQload [i+8*c] {s} p mem) -(MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSSload [i+c] {s} p mem) -(MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+4*c) -> (MOVSSload [i+4*c] {s} p mem) -(MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+c) -> (MOVSDload [i+c] {s} p mem) -(MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) && is32Bit(i+8*c) -> (MOVSDload [i+8*c] {s} p mem) - -// Combine consts into storeidx. -// Note that when c == 0, it takes more bytes to encode -// the immediate $0 than to zero a register and use it. -// We do the rewrite anyway, to minimize register pressure. -(MOVBstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)), off) -> (MOVBstoreconstidx1 [makeValAndOff(int64(int8(c)), off)] {s} ptr idx mem) -(MOVWstoreidx(1|2) [off] {s} ptr idx (MOVLconst [c]) mem) && validValAndOff(int64(int16(c)), off) -> (MOVWstoreconstidx(1|2) [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem) -(MOVLstoreidx(1|4) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(int64(int32(c)), off) -> (MOVLstoreconstidx(1|4) [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem) -(MOVQstoreidx(1|8) [off] {s} ptr idx (MOVQconst [c]) mem) && validValAndOff(c, off) -> (MOVQstoreconstidx(1|8) [makeValAndOff(c, off)] {s} ptr idx mem) - // Redundant sign/zero extensions // Note: see issue 21963. We have to make sure we use the right type on // the resulting extension (the outer type, not the inner type). diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 238e243096..b3e7d34779 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -1247,3 +1247,43 @@ func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 { copy(buf, src) return byteorder.Uint64(buf) } + +// same reports whether x and y are the same value. +// It checks to a maximum depth of d, so it may report +// a false negative. +func same(x, y *Value, depth int) bool { + if x == y { + return true + } + if depth <= 0 { + return false + } + if x.Op != y.Op || x.Aux != y.Aux || x.AuxInt != y.AuxInt { + return false + } + if len(x.Args) != len(y.Args) { + return false + } + if opcodeTable[x.Op].commutative { + // Check exchanged ordering first. + for i, a := range x.Args { + j := i + if j < 2 { + j ^= 1 + } + b := y.Args[j] + if !same(a, b, depth-1) { + goto checkNormalOrder + } + } + return true + checkNormalOrder: + } + for i, a := range x.Args { + b := y.Args[i] + if !same(a, b, depth-1) { + return false + } + } + return true +} diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index c37bae2c22..bd1f4c08e2 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -246,16 +246,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64MOVBatomicload(v) case OpAMD64MOVBload: return rewriteValueAMD64_OpAMD64MOVBload(v) - case OpAMD64MOVBloadidx1: - return rewriteValueAMD64_OpAMD64MOVBloadidx1(v) case OpAMD64MOVBstore: return rewriteValueAMD64_OpAMD64MOVBstore(v) case OpAMD64MOVBstoreconst: return rewriteValueAMD64_OpAMD64MOVBstoreconst(v) - case OpAMD64MOVBstoreconstidx1: - return rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v) - case OpAMD64MOVBstoreidx1: - return rewriteValueAMD64_OpAMD64MOVBstoreidx1(v) case OpAMD64MOVLQSX: return rewriteValueAMD64_OpAMD64MOVLQSX(v) case OpAMD64MOVLQSXload: @@ -270,26 +264,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64MOVLi2f(v) case OpAMD64MOVLload: return rewriteValueAMD64_OpAMD64MOVLload(v) - case OpAMD64MOVLloadidx1: - return rewriteValueAMD64_OpAMD64MOVLloadidx1(v) - case OpAMD64MOVLloadidx4: - return rewriteValueAMD64_OpAMD64MOVLloadidx4(v) - case OpAMD64MOVLloadidx8: - return rewriteValueAMD64_OpAMD64MOVLloadidx8(v) case OpAMD64MOVLstore: return rewriteValueAMD64_OpAMD64MOVLstore(v) case OpAMD64MOVLstoreconst: return rewriteValueAMD64_OpAMD64MOVLstoreconst(v) - case OpAMD64MOVLstoreconstidx1: - return rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v) - case OpAMD64MOVLstoreconstidx4: - return rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v) - case OpAMD64MOVLstoreidx1: - return rewriteValueAMD64_OpAMD64MOVLstoreidx1(v) - case OpAMD64MOVLstoreidx4: - return rewriteValueAMD64_OpAMD64MOVLstoreidx4(v) - case OpAMD64MOVLstoreidx8: - return rewriteValueAMD64_OpAMD64MOVLstoreidx8(v) case OpAMD64MOVOload: return rewriteValueAMD64_OpAMD64MOVOload(v) case OpAMD64MOVOstore: @@ -302,46 +280,18 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64MOVQi2f(v) case OpAMD64MOVQload: return rewriteValueAMD64_OpAMD64MOVQload(v) - case OpAMD64MOVQloadidx1: - return rewriteValueAMD64_OpAMD64MOVQloadidx1(v) - case OpAMD64MOVQloadidx8: - return rewriteValueAMD64_OpAMD64MOVQloadidx8(v) case OpAMD64MOVQstore: return rewriteValueAMD64_OpAMD64MOVQstore(v) case OpAMD64MOVQstoreconst: return rewriteValueAMD64_OpAMD64MOVQstoreconst(v) - case OpAMD64MOVQstoreconstidx1: - return rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v) - case OpAMD64MOVQstoreconstidx8: - return rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v) - case OpAMD64MOVQstoreidx1: - return rewriteValueAMD64_OpAMD64MOVQstoreidx1(v) - case OpAMD64MOVQstoreidx8: - return rewriteValueAMD64_OpAMD64MOVQstoreidx8(v) case OpAMD64MOVSDload: return rewriteValueAMD64_OpAMD64MOVSDload(v) - case OpAMD64MOVSDloadidx1: - return rewriteValueAMD64_OpAMD64MOVSDloadidx1(v) - case OpAMD64MOVSDloadidx8: - return rewriteValueAMD64_OpAMD64MOVSDloadidx8(v) case OpAMD64MOVSDstore: return rewriteValueAMD64_OpAMD64MOVSDstore(v) - case OpAMD64MOVSDstoreidx1: - return rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v) - case OpAMD64MOVSDstoreidx8: - return rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v) case OpAMD64MOVSSload: return rewriteValueAMD64_OpAMD64MOVSSload(v) - case OpAMD64MOVSSloadidx1: - return rewriteValueAMD64_OpAMD64MOVSSloadidx1(v) - case OpAMD64MOVSSloadidx4: - return rewriteValueAMD64_OpAMD64MOVSSloadidx4(v) case OpAMD64MOVSSstore: return rewriteValueAMD64_OpAMD64MOVSSstore(v) - case OpAMD64MOVSSstoreidx1: - return rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v) - case OpAMD64MOVSSstoreidx4: - return rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v) case OpAMD64MOVWQSX: return rewriteValueAMD64_OpAMD64MOVWQSX(v) case OpAMD64MOVWQSXload: @@ -350,22 +300,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64MOVWQZX(v) case OpAMD64MOVWload: return rewriteValueAMD64_OpAMD64MOVWload(v) - case OpAMD64MOVWloadidx1: - return rewriteValueAMD64_OpAMD64MOVWloadidx1(v) - case OpAMD64MOVWloadidx2: - return rewriteValueAMD64_OpAMD64MOVWloadidx2(v) case OpAMD64MOVWstore: return rewriteValueAMD64_OpAMD64MOVWstore(v) case OpAMD64MOVWstoreconst: return rewriteValueAMD64_OpAMD64MOVWstoreconst(v) - case OpAMD64MOVWstoreconstidx1: - return rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v) - case OpAMD64MOVWstoreconstidx2: - return rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v) - case OpAMD64MOVWstoreidx1: - return rewriteValueAMD64_OpAMD64MOVWstoreidx1(v) - case OpAMD64MOVWstoreidx2: - return rewriteValueAMD64_OpAMD64MOVWstoreidx2(v) case OpAMD64MULL: return rewriteValueAMD64_OpAMD64MULL(v) case OpAMD64MULLconst: @@ -9347,6 +9285,64 @@ func rewriteValueAMD64_OpAMD64LEAQ1(v *Value) bool { } break } + // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y) + for { + off1 := v.AuxInt + sym1 := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { + continue + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + continue + } + v.reset(OpAMD64LEAQ2) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(x, y) + return true + } + break + } + // match: (LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x) + for { + off1 := v.AuxInt + sym1 := v.Aux + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { + continue + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + v_1_1 := v_1.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 { + if x != v_1_0 { + continue + } + y := v_1_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + continue + } + v.reset(OpAMD64LEAQ2) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(y, x) + return true + } + } + break + } // match: (LEAQ1 [0] x y) // cond: v.Aux == nil // result: (ADDQ x y) @@ -9464,6 +9460,28 @@ func rewriteValueAMD64_OpAMD64LEAQ2(v *Value) bool { v.AddArg2(x, y) return true } + // match: (LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(off1+2*off2) && sym2 == nil + // result: (LEAQ4 [off1+2*off2] {sym1} x y) + for { + off1 := v.AuxInt + sym1 := v.Aux + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1+2*off2) && sym2 == nil) { + break + } + v.reset(OpAMD64LEAQ4) + v.AuxInt = off1 + 2*off2 + v.Aux = sym1 + v.AddArg2(x, y) + return true + } return false } func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool { @@ -9549,6 +9567,28 @@ func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool { v.AddArg2(x, y) return true } + // match: (LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) + // cond: is32Bit(off1+4*off2) && sym2 == nil + // result: (LEAQ8 [off1+4*off2] {sym1} x y) + for { + off1 := v.AuxInt + sym1 := v.Aux + x := v_0 + if v_1.Op != OpAMD64LEAQ1 { + break + } + off2 := v_1.AuxInt + sym2 := v_1.Aux + y := v_1.Args[1] + if y != v_1.Args[0] || !(is32Bit(off1+4*off2) && sym2 == nil) { + break + } + v.reset(OpAMD64LEAQ8) + v.AuxInt = off1 + 4*off2 + v.Aux = sym1 + v.AddArg2(x, y) + return true + } return false } func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool { @@ -9899,30 +9939,6 @@ func rewriteValueAMD64_OpAMD64MOVBQZX(v *Value) bool { v.copyOf(x) return true } - // match: (MOVBQZX x:(MOVBloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVBloadidx1 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != OpAMD64MOVBloadidx1 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, OpAMD64MOVBloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } // match: (MOVBQZX (ANDLconst [c] x)) // result: (ANDLconst [c & 0xff] x) for { @@ -10000,6 +10016,7 @@ func rewriteValueAMD64_OpAMD64MOVBatomicload(v *Value) bool { func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block // match: (MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) // result: (MOVBQZX x) @@ -10064,55 +10081,117 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVBload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVBloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVBload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVBloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVBload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVBload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVBloadidx1 [off] {sym} ptr idx mem) + // match: (MOVBload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVBloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { + break } - break + v.reset(OpAMD64MOVBload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) + return true + } + // match: (MOVBload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { + break + } + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVBload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) + return true + } + // match: (MOVBload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { + break + } + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVBload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) + return true } // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) @@ -10172,86 +10251,6 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVBloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVBloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVBloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVBloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVBloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVBloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVBload [i+c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVBload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) - return true - } - break - } - return false -} func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -10599,103 +10598,163 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVBstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVBstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + // match: (MOVBstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] val := v_1 mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVBstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVBstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - // match: (MOVBstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVBstoreidx1 [off] {sym} ptr idx val mem) + // match: (MOVBstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVBstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break } - break + v.reset(OpAMD64MOVBstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true } - // match: (MOVBstore [i] {s} p w x0:(MOVBstore [i-1] {s} p (SHRWconst [8] w) mem)) - // cond: x0.Uses == 1 && clobber(x0) - // result: (MOVWstore [i-1] {s} p (ROLWconst [8] w) mem) + // match: (MOVBstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - w := v_1 - x0 := v_2 - if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { - break - } - mem := x0.Args[2] - if p != x0.Args[0] { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && clobber(x0)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i - 1 + v.reset(OpAMD64MOVBstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true + } + // match: (MOVBstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVBstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { + break + } + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVBstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true + } + // match: (MOVBstore [i] {s} p1 w x0:(MOVBstore [i-1] {s} p0 (SHRWconst [8] w) mem)) + // cond: x0.Uses == 1 && same(p0, p1, 1) && clobber(x0) + // result: (MOVWstore [i-1] {s} p0 (ROLWconst [8] w) mem) + for { + i := v.AuxInt + s := v.Aux + p1 := v_0 + w := v_1 + x0 := v_2 + if x0.Op != OpAMD64MOVBstore || x0.AuxInt != i-1 || x0.Aux != s { + break + } + mem := x0.Args[2] + p0 := x0.Args[0] + x0_1 := x0.Args[1] + if x0_1.Op != OpAMD64SHRWconst || x0_1.AuxInt != 8 || w != x0_1.Args[0] || !(x0.Uses == 1 && same(p0, p1, 1) && clobber(x0)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i - 1 v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64ROLWconst, w.Type) v0.AuxInt = 8 v0.AddArg(w) - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p w x2:(MOVBstore [i-1] {s} p (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p (SHRLconst [24] w) mem)))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) - // result: (MOVLstore [i-3] {s} p (BSWAPL w) mem) + // match: (MOVBstore [i] {s} p3 w x2:(MOVBstore [i-1] {s} p2 (SHRLconst [8] w) x1:(MOVBstore [i-2] {s} p1 (SHRLconst [16] w) x0:(MOVBstore [i-3] {s} p0 (SHRLconst [24] w) mem)))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2) + // result: (MOVLstore [i-3] {s} p0 (BSWAPL w) mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p3 := v_0 w := v_1 x2 := v_2 if x2.Op != OpAMD64MOVBstore || x2.AuxInt != i-1 || x2.Aux != s { break } _ = x2.Args[2] - if p != x2.Args[0] { - break - } + p2 := x2.Args[0] x2_1 := x2.Args[1] if x2_1.Op != OpAMD64SHRLconst || x2_1.AuxInt != 8 || w != x2_1.Args[0] { break @@ -10705,9 +10764,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x1.Args[2] - if p != x1.Args[0] { - break - } + p1 := x1.Args[0] x1_1 := x1.Args[1] if x1_1.Op != OpAMD64SHRLconst || x1_1.AuxInt != 16 || w != x1_1.Args[0] { break @@ -10717,11 +10774,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x0.Args[2] - if p != x0.Args[0] { - break - } + p0 := x0.Args[0] x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) { + if x0_1.Op != OpAMD64SHRLconst || x0_1.AuxInt != 24 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && clobber(x0, x1, x2)) { break } v.reset(OpAMD64MOVLstore) @@ -10729,25 +10784,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, w.Type) v0.AddArg(w) - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p w x6:(MOVBstore [i-1] {s} p (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p (SHRQconst [56] w) mem)))))))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6) - // result: (MOVQstore [i-7] {s} p (BSWAPQ w) mem) + // match: (MOVBstore [i] {s} p7 w x6:(MOVBstore [i-1] {s} p6 (SHRQconst [8] w) x5:(MOVBstore [i-2] {s} p5 (SHRQconst [16] w) x4:(MOVBstore [i-3] {s} p4 (SHRQconst [24] w) x3:(MOVBstore [i-4] {s} p3 (SHRQconst [32] w) x2:(MOVBstore [i-5] {s} p2 (SHRQconst [40] w) x1:(MOVBstore [i-6] {s} p1 (SHRQconst [48] w) x0:(MOVBstore [i-7] {s} p0 (SHRQconst [56] w) mem)))))))) + // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6) + // result: (MOVQstore [i-7] {s} p0 (BSWAPQ w) mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p7 := v_0 w := v_1 x6 := v_2 if x6.Op != OpAMD64MOVBstore || x6.AuxInt != i-1 || x6.Aux != s { break } _ = x6.Args[2] - if p != x6.Args[0] { - break - } + p6 := x6.Args[0] x6_1 := x6.Args[1] if x6_1.Op != OpAMD64SHRQconst || x6_1.AuxInt != 8 || w != x6_1.Args[0] { break @@ -10757,9 +10810,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x5.Args[2] - if p != x5.Args[0] { - break - } + p5 := x5.Args[0] x5_1 := x5.Args[1] if x5_1.Op != OpAMD64SHRQconst || x5_1.AuxInt != 16 || w != x5_1.Args[0] { break @@ -10769,9 +10820,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x4.Args[2] - if p != x4.Args[0] { - break - } + p4 := x4.Args[0] x4_1 := x4.Args[1] if x4_1.Op != OpAMD64SHRQconst || x4_1.AuxInt != 24 || w != x4_1.Args[0] { break @@ -10781,9 +10830,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x3.Args[2] - if p != x3.Args[0] { - break - } + p3 := x3.Args[0] x3_1 := x3.Args[1] if x3_1.Op != OpAMD64SHRQconst || x3_1.AuxInt != 32 || w != x3_1.Args[0] { break @@ -10793,9 +10840,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x2.Args[2] - if p != x2.Args[0] { - break - } + p2 := x2.Args[0] x2_1 := x2.Args[1] if x2_1.Op != OpAMD64SHRQconst || x2_1.AuxInt != 40 || w != x2_1.Args[0] { break @@ -10805,9 +10850,7 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } _ = x1.Args[2] - if p != x1.Args[0] { - break - } + p1 := x1.Args[0] x1_1 := x1.Args[1] if x1_1.Op != OpAMD64SHRQconst || x1_1.AuxInt != 48 || w != x1_1.Args[0] { break @@ -10817,11 +10860,9 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x0.Args[2] - if p != x0.Args[0] { - break - } + p0 := x0.Args[0] x0_1 := x0.Args[1] - if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) { + if x0_1.Op != OpAMD64SHRQconst || x0_1.AuxInt != 56 || w != x0_1.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && same(p0, p1, 1) && same(p1, p2, 1) && same(p2, p3, 1) && same(p3, p4, 1) && same(p4, p5, 1) && same(p5, p6, 1) && same(p6, p7, 1) && clobber(x0, x1, x2, x3, x4, x5, x6)) { break } v.reset(OpAMD64MOVQstore) @@ -10829,16 +10870,16 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { v.Aux = s v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPQ, w.Type) v0.AddArg(w) - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVBstore [i] {s} p (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i-1] {s} p w mem) + // match: (MOVBstore [i] {s} p1 (SHRWconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i-1] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRWconst || v_1.AuxInt != 8 { break } @@ -10848,22 +10889,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i-1] {s} p w mem) + // match: (MOVBstore [i] {s} p1 (SHRLconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i-1] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 8 { break } @@ -10873,22 +10915,23 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i-1] {s} p w mem) + // match: (MOVBstore [i] {s} p1 (SHRQconst [8] w) x:(MOVBstore [i-1] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i-1] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 8 { break } @@ -10898,100 +10941,95 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRWconst [8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i] {s} p w mem) + // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRWconst [8] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 w := v_1 x := v_2 if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + if x_1.Op != OpAMD64SHRWconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRLconst [8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i] {s} p w mem) + // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRLconst [8] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 w := v_1 x := v_2 if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + if x_1.Op != OpAMD64SHRLconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p w x:(MOVBstore [i+1] {s} p (SHRQconst [8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i] {s} p w mem) + // match: (MOVBstore [i] {s} p1 w x:(MOVBstore [i+1] {s} p0 (SHRQconst [8] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 w := v_1 x := v_2 if x.Op != OpAMD64MOVBstore || x.AuxInt != i+1 || x.Aux != s { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] x_1 := x.Args[1] - if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && clobber(x)) { + if x_1.Op != OpAMD64SHRQconst || x_1.AuxInt != 8 || w != x_1.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVBstore [i] {s} p (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i-1] {s} p w0 mem) + // match: (MOVBstore [i] {s} p1 (SHRLconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRLconst [j-8] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i-1] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRLconst { break } @@ -11002,26 +11040,24 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p, w0, mem) + v.AddArg3(p0, w0, mem) return true } - // match: (MOVBstore [i] {s} p (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p w0:(SHRQconst [j-8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstore [i-1] {s} p w0 mem) + // match: (MOVBstore [i] {s} p1 (SHRQconst [j] w) x:(MOVBstore [i-1] {s} p0 w0:(SHRQconst [j-8] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVWstore [i-1] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRQconst { break } @@ -11032,17 +11068,15 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVWstore) v.AuxInt = i - 1 v.Aux = s - v.AddArg3(p, w0, mem) + v.AddArg3(p0, w0, mem) return true } // match: (MOVBstore [i] {s} p x1:(MOVBload [j] {s2} p2 mem) mem2:(MOVBstore [i-1] {s} p x2:(MOVBload [j-1] {s2} p2 mem) mem)) @@ -11179,53 +11213,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconst [x] {sym} (ADDQ ptr idx) mem) - // result: (MOVBstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(OpAMD64MOVBstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) for { c := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11235,22 +11229,23 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p mem) + // match: (MOVBstoreconst [a] {s} p1 x:(MOVBstoreconst [c] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p0 mem) for { a := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11260,13 +11255,14 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) v.Aux = s - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -11314,704 +11310,96 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool { v_0 := v.Args[0] - // match: (MOVBstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + b := v.Block + // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + x := v_0 + if x.Op != OpAMD64MOVLload { + break } - break + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true } - // match: (MOVBstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVBstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVBstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + x := v_0 + if x.Op != OpAMD64MOVQload { + break } - break + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true } - // match: (MOVBstoreconstidx1 [c] {s} p i x:(MOVBstoreconstidx1 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 1 == ValAndOff(c).Off() && clobber(x) - // result: (MOVWstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xff | ValAndOff(c).Val()<<8, ValAndOff(a).Off())] {s} p i mem) + // match: (MOVLQSX (ANDLconst [c] x)) + // cond: c & 0x80000000 == 0 + // result: (ANDLconst [c & 0x7fffffff] x) for { - c := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - i := v_1 - x := v_2 - if x.Op != OpAMD64MOVBstoreconstidx1 { - continue - } - a := x.AuxInt - if x.Aux != s { - continue - } - mem := x.Args[2] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+1 == ValAndOff(c).Off() && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xff|ValAndOff(c).Val()<<8, ValAndOff(a).Off()) - v.Aux = s - v.AddArg3(p, i, mem) - return true - } + if v_0.Op != OpAMD64ANDLconst { + break } - break + c := v_0.AuxInt + x := v_0.Args[0] + if !(c&0x80000000 == 0) { + break + } + v.reset(OpAMD64ANDLconst) + v.AuxInt = c & 0x7fffffff + v.AddArg(x) + return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVBstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MOVBstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem) + // match: (MOVLQSX (MOVLQSX x)) + // result: (MOVLQSX x) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVBstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + if v_0.Op != OpAMD64MOVLQSX { + break } - break + x := v_0.Args[0] + v.reset(OpAMD64MOVLQSX) + v.AddArg(x) + return true } - // match: (MOVBstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVBstoreidx1 [c+d] {sym} ptr idx val mem) + // match: (MOVLQSX (MOVWQSX x)) + // result: (MOVWQSX x) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVBstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + if v_0.Op != OpAMD64MOVWQSX { + break } - break + x := v_0.Args[0] + v.reset(OpAMD64MOVWQSX) + v.AddArg(x) + return true } - // match: (MOVBstoreidx1 [i] {s} p idx w x0:(MOVBstoreidx1 [i-1] {s} p idx (SHRWconst [8] w) mem)) - // cond: x0.Uses == 1 && clobber(x0) - // result: (MOVWstoreidx1 [i-1] {s} p idx (ROLWconst [8] w) mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - w := v_2 - x0 := v_3 - if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-1 || x0.Aux != s { - continue - } - mem := x0.Args[3] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 { - continue - } - x0_2 := x0.Args[2] - if x0_2.Op != OpAMD64SHRWconst || x0_2.AuxInt != 8 || w != x0_2.Args[0] || !(x0.Uses == 1 && clobber(x0)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, w.Type) - v0.AuxInt = 8 - v0.AddArg(w) - v.AddArg4(p, idx, v0, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx w x2:(MOVBstoreidx1 [i-1] {s} p idx (SHRLconst [8] w) x1:(MOVBstoreidx1 [i-2] {s} p idx (SHRLconst [16] w) x0:(MOVBstoreidx1 [i-3] {s} p idx (SHRLconst [24] w) mem)))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2) - // result: (MOVLstoreidx1 [i-3] {s} p idx (BSWAPL w) mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - w := v_2 - x2 := v_3 - if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-1 || x2.Aux != s { - continue - } - _ = x2.Args[3] - x2_0 := x2.Args[0] - x2_1 := x2.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x2_0, x2_1 = _i1+1, x2_1, x2_0 { - if p != x2_0 || idx != x2_1 { - continue - } - x2_2 := x2.Args[2] - if x2_2.Op != OpAMD64SHRLconst || x2_2.AuxInt != 8 || w != x2_2.Args[0] { - continue - } - x1 := x2.Args[3] - if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-2 || x1.Aux != s { - continue - } - _ = x1.Args[3] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 { - continue - } - x1_2 := x1.Args[2] - if x1_2.Op != OpAMD64SHRLconst || x1_2.AuxInt != 16 || w != x1_2.Args[0] { - continue - } - x0 := x1.Args[3] - if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-3 || x0.Aux != s { - continue - } - mem := x0.Args[3] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 { - continue - } - x0_2 := x0.Args[2] - if x0_2.Op != OpAMD64SHRLconst || x0_2.AuxInt != 24 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && clobber(x0, x1, x2)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 3 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, w.Type) - v0.AddArg(w) - v.AddArg4(p, idx, v0, mem) - return true - } - } - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx w x6:(MOVBstoreidx1 [i-1] {s} p idx (SHRQconst [8] w) x5:(MOVBstoreidx1 [i-2] {s} p idx (SHRQconst [16] w) x4:(MOVBstoreidx1 [i-3] {s} p idx (SHRQconst [24] w) x3:(MOVBstoreidx1 [i-4] {s} p idx (SHRQconst [32] w) x2:(MOVBstoreidx1 [i-5] {s} p idx (SHRQconst [40] w) x1:(MOVBstoreidx1 [i-6] {s} p idx (SHRQconst [48] w) x0:(MOVBstoreidx1 [i-7] {s} p idx (SHRQconst [56] w) mem)))))))) - // cond: x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6) - // result: (MOVQstoreidx1 [i-7] {s} p idx (BSWAPQ w) mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - w := v_2 - x6 := v_3 - if x6.Op != OpAMD64MOVBstoreidx1 || x6.AuxInt != i-1 || x6.Aux != s { - continue - } - _ = x6.Args[3] - x6_0 := x6.Args[0] - x6_1 := x6.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x6_0, x6_1 = _i1+1, x6_1, x6_0 { - if p != x6_0 || idx != x6_1 { - continue - } - x6_2 := x6.Args[2] - if x6_2.Op != OpAMD64SHRQconst || x6_2.AuxInt != 8 || w != x6_2.Args[0] { - continue - } - x5 := x6.Args[3] - if x5.Op != OpAMD64MOVBstoreidx1 || x5.AuxInt != i-2 || x5.Aux != s { - continue - } - _ = x5.Args[3] - x5_0 := x5.Args[0] - x5_1 := x5.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x5_0, x5_1 = _i2+1, x5_1, x5_0 { - if p != x5_0 || idx != x5_1 { - continue - } - x5_2 := x5.Args[2] - if x5_2.Op != OpAMD64SHRQconst || x5_2.AuxInt != 16 || w != x5_2.Args[0] { - continue - } - x4 := x5.Args[3] - if x4.Op != OpAMD64MOVBstoreidx1 || x4.AuxInt != i-3 || x4.Aux != s { - continue - } - _ = x4.Args[3] - x4_0 := x4.Args[0] - x4_1 := x4.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x4_0, x4_1 = _i3+1, x4_1, x4_0 { - if p != x4_0 || idx != x4_1 { - continue - } - x4_2 := x4.Args[2] - if x4_2.Op != OpAMD64SHRQconst || x4_2.AuxInt != 24 || w != x4_2.Args[0] { - continue - } - x3 := x4.Args[3] - if x3.Op != OpAMD64MOVBstoreidx1 || x3.AuxInt != i-4 || x3.Aux != s { - continue - } - _ = x3.Args[3] - x3_0 := x3.Args[0] - x3_1 := x3.Args[1] - for _i4 := 0; _i4 <= 1; _i4, x3_0, x3_1 = _i4+1, x3_1, x3_0 { - if p != x3_0 || idx != x3_1 { - continue - } - x3_2 := x3.Args[2] - if x3_2.Op != OpAMD64SHRQconst || x3_2.AuxInt != 32 || w != x3_2.Args[0] { - continue - } - x2 := x3.Args[3] - if x2.Op != OpAMD64MOVBstoreidx1 || x2.AuxInt != i-5 || x2.Aux != s { - continue - } - _ = x2.Args[3] - x2_0 := x2.Args[0] - x2_1 := x2.Args[1] - for _i5 := 0; _i5 <= 1; _i5, x2_0, x2_1 = _i5+1, x2_1, x2_0 { - if p != x2_0 || idx != x2_1 { - continue - } - x2_2 := x2.Args[2] - if x2_2.Op != OpAMD64SHRQconst || x2_2.AuxInt != 40 || w != x2_2.Args[0] { - continue - } - x1 := x2.Args[3] - if x1.Op != OpAMD64MOVBstoreidx1 || x1.AuxInt != i-6 || x1.Aux != s { - continue - } - _ = x1.Args[3] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i6 := 0; _i6 <= 1; _i6, x1_0, x1_1 = _i6+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 { - continue - } - x1_2 := x1.Args[2] - if x1_2.Op != OpAMD64SHRQconst || x1_2.AuxInt != 48 || w != x1_2.Args[0] { - continue - } - x0 := x1.Args[3] - if x0.Op != OpAMD64MOVBstoreidx1 || x0.AuxInt != i-7 || x0.Aux != s { - continue - } - mem := x0.Args[3] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i7 := 0; _i7 <= 1; _i7, x0_0, x0_1 = _i7+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 { - continue - } - x0_2 := x0.Args[2] - if x0_2.Op != OpAMD64SHRQconst || x0_2.AuxInt != 56 || w != x0_2.Args[0] || !(x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && clobber(x0, x1, x2, x3, x4, x5, x6)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = i - 7 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, w.Type) - v0.AddArg(w) - v.AddArg4(p, idx, v0, mem) - return true - } - } - } - } - } - } - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRWconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRWconst || v_2.AuxInt != 8 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 8 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [8] w) x:(MOVBstoreidx1 [i-1] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 8 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRLconst [j-8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRLconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVBstoreidx1 [i-1] {s} p idx w0:(SHRQconst [j-8] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVWstoreidx1 [i-1] {s} p idx w0 mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVBstoreidx1 || x.AuxInt != i-1 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-8 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = i - 1 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } - } - break - } - // match: (MOVBstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVBstore [i+c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVBstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - break - } - // match: (MOVBstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(int64(int8(c)), off) - // result: (MOVBstoreconstidx1 [makeValAndOff(int64(int8(c)), off)] {s} ptr idx mem) - for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVLconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(int64(int8(c)), off)) { - break - } - v.reset(OpAMD64MOVBstoreconstidx1) - v.AuxInt = makeValAndOff(int64(int8(c)), off) - v.Aux = s - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVLQSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVLQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVLload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVLQSX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLQSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVLQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVLQSX (ANDLconst [c] x)) - // cond: c & 0x80000000 == 0 - // result: (ANDLconst [c & 0x7fffffff] x) - for { - if v_0.Op != OpAMD64ANDLconst { - break - } - c := v_0.AuxInt - x := v_0.Args[0] - if !(c&0x80000000 == 0) { - break - } - v.reset(OpAMD64ANDLconst) - v.AuxInt = c & 0x7fffffff - v.AddArg(x) - return true - } - // match: (MOVLQSX (MOVLQSX x)) - // result: (MOVLQSX x) - for { - if v_0.Op != OpAMD64MOVLQSX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVLQSX) - v.AddArg(x) - return true - } - // match: (MOVLQSX (MOVWQSX x)) - // result: (MOVWQSX x) - for { - if v_0.Op != OpAMD64MOVWQSX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) - return true - } - // match: (MOVLQSX (MOVBQSX x)) - // result: (MOVBQSX x) + // match: (MOVLQSX (MOVBQSX x)) + // result: (MOVBQSX x) for { if v_0.Op != OpAMD64MOVBQSX { break @@ -12131,58 +11519,10 @@ func rewriteValueAMD64_OpAMD64MOVLQZX(v *Value) bool { v.copyOf(x) return true } - // match: (MOVLQZX x:(MOVLloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLloadidx1 [off] {sym} ptr idx mem) + // match: (MOVLQZX (ANDLconst [c] x)) + // result: (ANDLconst [c] x) for { - x := v_0 - if x.Op != OpAMD64MOVLloadidx1 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLQZX x:(MOVLloadidx4 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVLloadidx4 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != OpAMD64MOVLloadidx4 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx4, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLQZX (ANDLconst [c] x)) - // result: (ANDLconst [c] x) - for { - if v_0.Op != OpAMD64ANDLconst { + if v_0.Op != OpAMD64ANDLconst { break } c := v_0.AuxInt @@ -12396,101 +11736,117 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { v.AddArg2(base, mem) return true } - // match: (MOVLload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVLload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ4 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVLload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLloadidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVLload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVLloadidx1 [off] {sym} ptr idx mem) + // match: (MOVLload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVLloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { + break } - break + v.reset(OpAMD64MOVLload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) + return true } // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) @@ -12567,375 +11923,113 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) - // result: (MOVLloadidx4 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLloadidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVLloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) - // result: (MOVLloadidx8 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLloadidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVLloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) + // result: (MOVLstore [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVLloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + ptr := v_0 + if v_1.Op != OpAMD64MOVLQSX { + break } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) + return true } - // match: (MOVLloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVLloadidx1 [c+d] {sym} ptr idx mem) + // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) + // result: (MOVLstore [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVLloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVLloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVLload [i+c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) - return true + ptr := v_0 + if v_1.Op != OpAMD64MOVLQZX { + break } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) + return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLloadidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVLloadidx4 [c+d] {sym} ptr idx mem) + // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVLstore [off1+off2] {sym} ptr val mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux if v_0.Op != OpAMD64ADDQconst { break } - d := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 + val := v_1 mem := v_2 - if !(is32Bit(c + d)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVLloadidx4) - v.AuxInt = c + d + v.reset(OpAMD64MOVLstore) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVLloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+4*d) - // result: (MOVLloadidx4 [c+4*d] {sym} ptr idx mem) + // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) + // cond: validOff(off) + // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_1.Op != OpAMD64MOVLconst { break } - d := v_1.AuxInt - idx := v_1.Args[0] + c := v_1.AuxInt mem := v_2 - if !(is32Bit(c + 4*d)) { + if !(validOff(off)) { break } - v.reset(OpAMD64MOVLloadidx4) - v.AuxInt = c + 4*d + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(int64(int32(c)), off) v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVLloadidx4 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+4*c) - // result: (MOVLload [i+4*c] {s} p mem) + // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem) + // cond: validOff(off) + // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 + off := v.AuxInt + sym := v.Aux + ptr := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := v_1.AuxInt mem := v_2 - if !(is32Bit(i + 4*c)) { + if !(validOff(off)) { break } - v.reset(OpAMD64MOVLload) - v.AuxInt = i + 4*c - v.Aux = s - v.AddArg2(p, mem) + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(int64(int32(c)), off) + v.Aux = sym + v.AddArg2(ptr, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLloadidx8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVLloadidx8 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVLloadidx8) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+8*d) - // result: (MOVLloadidx8 [c+8*d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + 8*d)) { - break - } - v.reset(OpAMD64MOVLloadidx8) - v.AuxInt = c + 8*d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLloadidx8 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+8*c) - // result: (MOVLload [i+8*c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + 8*c)) { - break - } - v.reset(OpAMD64MOVLload) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg2(p, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MOVLstore [off] {sym} ptr (MOVLQSX x) mem) - // result: (MOVLstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLQSX { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVLQZX x) mem) - // result: (MOVLstore [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLQZX { - break - } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVLstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVLstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVLstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validOff(off) - // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(validOff(off)) { - break - } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = makeValAndOff(int64(int32(c)), off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVLstore [off] {sym} ptr (MOVQconst [c]) mem) - // cond: validOff(off) - // result: (MOVLstoreconst [makeValAndOff(int64(int32(c)),off)] {sym} ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(validOff(off)) { - break - } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = makeValAndOff(int64(int32(c)), off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVLstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -12956,113 +12050,129 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.AddArg3(base, val, mem) return true } - // match: (MOVLstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + // match: (MOVLstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] val := v_1 mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVLstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - // match: (MOVLstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + // match: (MOVLstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ4 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] val := v_1 mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVLstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - // match: (MOVLstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVLstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + // match: (MOVLstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] val := v_1 mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVLstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - // match: (MOVLstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVLstoreidx1 [off] {sym} ptr idx val mem) + // match: (MOVLstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVLstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break } - break + v.reset(OpAMD64MOVLstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true } - // match: (MOVLstore [i] {s} p (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstore [i-4] {s} p w mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [32] w) x:(MOVLstore [i-4] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVQstore [i-4] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 32 { break } @@ -13072,22 +12182,23 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) v.AuxInt = i - 4 v.Aux = s - v.AddArg3(p, w, mem) + v.AddArg3(p0, w, mem) return true } - // match: (MOVLstore [i] {s} p (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p w0:(SHRQconst [j-32] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstore [i-4] {s} p w0 mem) + // match: (MOVLstore [i] {s} p1 (SHRQconst [j] w) x:(MOVLstore [i-4] {s} p0 w0:(SHRQconst [j-32] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVQstore [i-4] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - p := v_0 + p1 := v_0 if v_1.Op != OpAMD64SHRQconst { break } @@ -13098,17 +12209,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { break } mem := x.Args[2] - if p != x.Args[0] { - break - } + p0 := x.Args[0] w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) v.AuxInt = i - 4 v.Aux = s - v.AddArg3(p, w0, mem) + v.AddArg3(p0, w0, mem) return true } // match: (MOVLstore [i] {s} p x1:(MOVLload [j] {s2} p2 mem) mem2:(MOVLstore [i-4] {s} p x2:(MOVLload [j-4] {s2} p2 mem) mem)) @@ -13785,113 +12894,52 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + c := v.AuxInt + s := v.Aux + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVLstoreconst { break } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { + a := x.AuxInt + if x.Aux != s { break } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = ValAndOff(a).Off() + v.Aux = s + v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 + v.AddArg3(p0, v0, mem) return true } - // match: (MOVLstoreconst [x] {sym1} (LEAQ4 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVLstoreconst [a] {s} p1 x:(MOVLstoreconst [c] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) + // result: (MOVQstore [ValAndOff(a).Off()] {s} p0 (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ4 { + a := v.AuxInt + s := v.Aux + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVLstoreconst { break } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreconst [x] {sym} (ADDQ ptr idx) mem) - // result: (MOVLstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) - for { - c := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != OpAMD64MOVLstoreconst { - break - } - a := x.AuxInt - if x.Aux != s { - break - } - mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = ValAndOff(a).Off() - v.Aux = s - v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p, v0, mem) - return true - } - // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstore [ValAndOff(a).Off()] {s} p (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) - for { - a := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != OpAMD64MOVLstoreconst { - break - } - c := x.AuxInt - if x.Aux != s { + c := x.AuxInt + if x.Aux != s { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -13899,7 +12947,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = s v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) @@ -13947,667 +12995,451 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MOVLstoreconstidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) - // result: (MOVLstoreconstidx4 [c] {sym} ptr idx mem) + // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVOload [off1+off2] {sym} ptr mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstoreconstidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + if v_0.Op != OpAMD64ADDQconst { + break } - break - } - // match: (MOVLstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break } - break + v.reset(OpAMD64MOVOload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) + return true } - // match: (MOVLstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVLstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { + break } - break - } - // match: (MOVLstoreconstidx1 [c] {s} p i x:(MOVLstoreconstidx1 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p i (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) - for { - c := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - i := v_1 - x := v_2 - if x.Op != OpAMD64MOVLstoreconstidx1 { - continue - } - a := x.AuxInt - if x.Aux != s { - continue - } - mem := x.Args[2] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = ValAndOff(a).Off() - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg4(p, i, v0, mem) - return true - } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break } - break + v.reset(OpAMD64MOVOload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64MOVLstoreconstidx4(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + config := b.Func.Config typ := &b.Func.Config.Types - // match: (MOVLstoreconstidx4 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVOstore [off1+off2] {sym} ptr val mem) for { - x := v.AuxInt + off1 := v.AuxInt sym := v.Aux if v_0.Op != OpAMD64ADDQconst { break } - c := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 + val := v_1 mem := v_2 - if !(ValAndOff(x).canAdd(c)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(c) + v.reset(OpAMD64MOVOstore) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVLstoreconstidx4 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(4*c) - // result: (MOVLstoreconstidx4 [ValAndOff(x).add(4*c)] {sym} ptr idx mem) + // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - c := v_1.AuxInt - idx := v_1.Args[0] + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 mem := v_2 - if !(ValAndOff(x).canAdd(4 * c)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVLstoreconstidx4) - v.AuxInt = ValAndOff(x).add(4 * c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVOstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - // match: (MOVLstoreconstidx4 [c] {s} p i x:(MOVLstoreconstidx4 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 4 == ValAndOff(c).Off() && clobber(x) - // result: (MOVQstoreidx1 [ValAndOff(a).Off()] {s} p (SHLQconst [2] i) (MOVQconst [ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32]) mem) + // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) + // cond: symIsRO(srcSym) + // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))]) mem)) for { - c := v.AuxInt - s := v.Aux - p := v_0 - i := v_1 - x := v_2 - if x.Op != OpAMD64MOVLstoreconstidx4 { + dstOff := v.AuxInt + dstSym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVOload { break } - a := x.AuxInt - if x.Aux != s { + srcOff := v_1.AuxInt + srcSym := v_1.Aux + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpSB { break } - mem := x.Args[2] - if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+4 == ValAndOff(c).Off() && clobber(x)) { + mem := v_2 + if !(symIsRO(srcSym)) { break } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = ValAndOff(a).Off() - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type) - v0.AuxInt = 2 - v0.AddArg(i) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64) - v1.AuxInt = ValAndOff(a).Val()&0xffffffff | ValAndOff(c).Val()<<32 - v.AddArg4(p, v0, v1, mem) + v.reset(OpAMD64MOVQstore) + v.AuxInt = dstOff + 8 + v.Aux = dstSym + v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) + v0.AuxInt = int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder)) + v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem) + v1.AuxInt = dstOff + v1.Aux = dstSym + v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) + v2.AuxInt = int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder)) + v1.AddArg3(ptr, v2, mem) + v.AddArg3(ptr, v0, v1) return true } return false } -func rewriteValueAMD64_OpAMD64MOVLstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) - // result: (MOVLstoreidx4 [c] {sym} ptr idx val mem) + // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVQatomicload [off1+off2] {sym} ptr mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVLstoreidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + if v_0.Op != OpAMD64ADDQconst { + break } - break - } - // match: (MOVLstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) - // result: (MOVLstoreidx8 [c] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVLstoreidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break } - break + v.reset(OpAMD64MOVQatomicload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg2(ptr, mem) + return true } - // match: (MOVLstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem) + // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVLstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVLstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { + break } - break - } - // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx1 [i-4] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstoreidx1 [i-4] {s} p idx w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = i - 4 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + off2 := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break } - break + v.reset(OpAMD64MOVQatomicload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) + return true } - // match: (MOVLstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx1 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstoreidx1 [i-4] {s} p idx w0 mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVQf2i (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVLstoreidx1 || x.AuxInt != i-4 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = i - 4 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } + t := v.Type + if v_0.Op != OpArg { + break } - break - } - // match: (MOVLstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVLstore [i+c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true + u := v_0.Type + off := v_0.AuxInt + sym := v_0.Aux + if !(t.Size() == u.Size()) { + break } - break + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + return true } - // match: (MOVLstoreidx1 [off] {s} ptr idx (MOVQconst [c]) mem) - // cond: validValAndOff(int64(int32(c)), off) - // result: (MOVLstoreconstidx1 [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVQi2f (Arg [off] {sym})) + // cond: t.Size() == u.Size() + // result: @b.Func.Entry (Arg [off] {sym}) for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVQconst { + t := v.Type + if v_0.Op != OpArg { break } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(int64(int32(c)), off)) { + u := v_0.Type + off := v_0.AuxInt + sym := v_0.Aux + if !(t.Size() == u.Size()) { break } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = makeValAndOff(int64(int32(c)), off) - v.Aux = s - v.AddArg3(ptr, idx, mem) + b = b.Func.Entry + v0 := b.NewValue0(v.Pos, OpArg, t) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym return true } return false } -func rewriteValueAMD64_OpAMD64MOVLstoreidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (MOVLstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVLstoreidx4 [c+d] {sym} ptr idx val mem) + config := b.Func.Config + // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: x for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64MOVQstore { break } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64MOVLstoreidx4) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.copyOf(x) return true } - // match: (MOVLstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+4*d) - // result: (MOVLstoreidx4 [c+4*d] {sym} ptr idx val mem) + // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVQload [off1+off2] {sym} ptr mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64ADDQconst { break } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 4*d)) { + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVLstoreidx4) - v.AuxInt = c + 4*d + v.reset(OpAMD64MOVQload) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [32] w) x:(MOVLstoreidx4 [i-4] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst [2] idx) w mem) + // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 32 { - break - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVLstoreidx4 || x.AuxInt != i-4 || x.Aux != s { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = i - 4 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type) - v0.AuxInt = 2 - v0.AddArg(idx) - v.AddArg4(p, v0, w, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) return true } - // match: (MOVLstoreidx4 [i] {s} p idx (SHRQconst [j] w) x:(MOVLstoreidx4 [i-4] {s} p idx w0:(SHRQconst [j-32] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVQstoreidx1 [i-4] {s} p (SHLQconst [2] idx) w0 mem) + // match: (MOVQload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst { - break - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVLstoreidx4 || x.AuxInt != i-4 || x.Aux != s { - break - } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-32 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = i - 4 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type) - v0.AuxInt = 2 - v0.AddArg(idx) - v.AddArg4(p, v0, w0, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLstoreidx4 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+4*c) - // result: (MOVLstore [i+4*c] {s} p w mem) + // match: (MOVQload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 4*c)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i + 4*c - v.Aux = s - v.AddArg3(p, w, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLstoreidx4 [off] {s} ptr idx (MOVQconst [c]) mem) - // cond: validValAndOff(int64(int32(c)), off) - // result: (MOVLstoreconstidx4 [makeValAndOff(int64(int32(c)), off)] {s} ptr idx mem) + // match: (MOVQload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVQconst { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(int64(int32(c)), off)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreconstidx4) - v.AuxInt = makeValAndOff(int64(int32(c)), off) - v.Aux = s - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVLstoreidx8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVLstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVLstoreidx8 [c+d] {sym} ptr idx val mem) + // match: (MOVQload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { break } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreidx8) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVLstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+8*d) - // result: (MOVLstoreidx8 [c+8*d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 8*d)) { - break - } - v.reset(OpAMD64MOVLstoreidx8) - v.AuxInt = c + 8*d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVLstoreidx8 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+8*c) - // result: (MOVLstore [i+8*c] {s} p w mem) + // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) + // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAL { break } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 8*c)) { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg3(p, w, mem) + v.reset(OpAMD64MOVQload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVOload [off1] {sym} (ADDQconst [off2] ptr) mem) + // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) // cond: is32Bit(off1+off2) - // result: (MOVOload [off1+off2] {sym} ptr mem) + // result: (MOVQload [off1+off2] {sym} ptr mem) for { off1 := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64ADDLconst { break } off2 := v_0.AuxInt @@ -14616,46 +13448,52 @@ func rewriteValueAMD64_OpAMD64MOVOload(v *Value) bool { if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVOload) + v.reset(OpAMD64MOVQload) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg2(ptr, mem) return true } - // match: (MOVOload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVOload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) + // result: (MOVQf2i val) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVSDstore || v_1.AuxInt != off || v_1.Aux != sym { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + val := v_1.Args[1] + if ptr != v_1.Args[0] { break } - v.reset(OpAMD64MOVOload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + v.reset(OpAMD64MOVQf2i) + v.AddArg(val) + return true + } + // match: (MOVQload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))]) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { + break + } + v.reset(OpAMD64MOVQconst) + v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder)) return true } return false } -func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - config := b.Func.Config - typ := &b.Func.Config.Types - // match: (MOVOstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) - // result: (MOVOstore [off1+off2] {sym} ptr val mem) + // result: (MOVQstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt sym := v.Aux @@ -14669,15 +13507,36 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVOstore) + v.reset(OpAMD64MOVQstore) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg3(ptr, val, mem) return true } - // match: (MOVOstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) + // cond: validValAndOff(c,off) + // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := v_1.AuxInt + mem := v_2 + if !(validValAndOff(c, off)) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg2(ptr, mem) + return true + } + // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVOstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -14692,3760 +13551,906 @@ func rewriteValueAMD64_OpAMD64MOVOstore(v *Value) bool { if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVOstore) + v.reset(OpAMD64MOVQstore) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) v.AddArg3(base, val, mem) return true } - // match: (MOVOstore [dstOff] {dstSym} ptr (MOVOload [srcOff] {srcSym} (SB) _) mem) - // cond: symIsRO(srcSym) - // result: (MOVQstore [dstOff+8] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder))]) (MOVQstore [dstOff] {dstSym} ptr (MOVQconst [int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder))]) mem)) + // match: (MOVQstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) for { - dstOff := v.AuxInt - dstSym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVOload { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - srcOff := v_1.AuxInt - srcSym := v_1.Aux - v_1_0 := v_1.Args[0] - if v_1_0.Op != OpSB { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true + } + // match: (MOVQstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 mem := v_2 - if !(symIsRO(srcSym)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } v.reset(OpAMD64MOVQstore) - v.AuxInt = dstOff + 8 - v.Aux = dstSym - v0 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) - v0.AuxInt = int64(read64(srcSym, srcOff+8, config.ctxt.Arch.ByteOrder)) - v1 := b.NewValue0(v_1.Pos, OpAMD64MOVQstore, types.TypeMem) - v1.AuxInt = dstOff - v1.Aux = dstSym - v2 := b.NewValue0(v_1.Pos, OpAMD64MOVQconst, typ.UInt64) - v2.AuxInt = int64(read64(srcSym, srcOff, config.ctxt.Arch.ByteOrder)) - v1.AddArg3(ptr, v2, mem) - v.AddArg3(ptr, v0, v1) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQatomicload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVQatomicload [off1+off2] {sym} ptr mem) + // match: (MOVQstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVQatomicload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVQstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - // match: (MOVQatomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQatomicload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + // match: (MOVQstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVQstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { + break + } + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true + } + // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) + // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { + if v_0.Op != OpAMD64LEAL { break } off2 := v_0.AuxInt sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { break } - v.reset(OpAMD64MOVQatomicload) + v.reset(OpAMD64MOVQstore) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQf2i(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVQf2i (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) + // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVQstore [off1+off2] {sym} ptr val mem) for { - t := v.Type - if v_0.Op != OpArg { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != OpAMD64ADDLconst { break } - u := v_0.Type - off := v_0.AuxInt - sym := v_0.Aux - if !(t.Size() == u.Size()) { + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { break } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym + v.reset(OpAMD64MOVQstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(ptr, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQi2f(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVQi2f (Arg [off] {sym})) - // cond: t.Size() == u.Size() - // result: @b.Func.Entry (Arg [off] {sym}) - for { - t := v.Type - if v_0.Op != OpArg { - break - } - u := v_0.Type - off := v_0.AuxInt - sym := v_0.Aux - if !(t.Size() == u.Size()) { - break - } - b = b.Func.Entry - v0 := b.NewValue0(v.Pos, OpArg, t) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVQload [off] {sym} ptr (MOVQstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: x + // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ADDQmodify [off] {sym} ptr x mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64MOVQstore { + y := v_1 + if y.Op != OpAMD64ADDQload || y.AuxInt != off || y.Aux != sym { break } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.copyOf(x) + v.reset(OpAMD64ADDQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVQload [off1+off2] {sym} ptr mem) + // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ANDQmodify [off] {sym} ptr x mem) for { - off1 := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ANDQload || y.AuxInt != off || y.Aux != sym { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = off1 + off2 + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVQload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (ORQmodify [off] {sym} ptr x mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ORQload || y.AuxInt != off || y.Aux != sym { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64MOVQloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64ORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem) + // cond: y.Uses==1 && clobber(y) + // result: (XORQmodify [off] {sym} ptr x mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + y := v_1 + if y.Op != OpAMD64XORQload || y.AuxInt != off || y.Aux != sym { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + mem := y.Args[2] + x := y.Args[0] + if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { break } - v.reset(OpAMD64MOVQloadidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64XORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVQloadidx1 [off] {sym} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ADDQmodify [off] {sym} ptr x mem) for { off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ADDQ { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { + continue + } + mem := l.Args[1] + if ptr != l.Args[0] { + continue + } + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { continue } - v.reset(OpAMD64MOVQloadidx1) + v.reset(OpAMD64ADDQmodify) v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } break } - // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) - // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) + // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (SUBQmodify [off] {sym} ptr x mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + y := v_1 + if y.Op != OpAMD64SUBQ { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVQload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDLconst { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = off1 + off2 + v.reset(OpAMD64SUBQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg2(ptr, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQload [off] {sym} ptr (MOVSDstore [off] {sym} ptr val _)) - // result: (MOVQf2i val) + // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ANDQmodify [off] {sym} ptr x mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64MOVSDstore || v_1.AuxInt != off || v_1.Aux != sym { - break - } - val := v_1.Args[1] - if ptr != v_1.Args[0] { - break - } - v.reset(OpAMD64MOVQf2i) - v.AddArg(val) - return true - } - // match: (MOVQload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))]) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { + y := v_1 + if y.Op != OpAMD64ANDQ { break } - v.reset(OpAMD64MOVQconst) - v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder)) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) - // result: (MOVQloadidx8 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { continue } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVQloadidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVQloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { + mem := l.Args[1] + if ptr != l.Args[0] { continue } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { continue } - v.reset(OpAMD64MOVQloadidx1) - v.AuxInt = c + d + v.reset(OpAMD64ANDQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } break } - // match: (MOVQloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVQloadidx1 [c+d] {sym} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (ORQmodify [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64ORQ { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { continue } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { + mem := l.Args[1] + if ptr != l.Args[0] { continue } - v.reset(OpAMD64MOVQloadidx1) - v.AuxInt = c + d + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64ORQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } break } - // match: (MOVQloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVQload [i+c] {s} p mem) + // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (XORQmodify [off] {sym} ptr x mem) for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + y := v_1 + if y.Op != OpAMD64XORQ { + break + } + _ = y.Args[1] + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { + l := y_0 + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { continue } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { + mem := l.Args[1] + if ptr != l.Args[0] { continue } - v.reset(OpAMD64MOVQload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) + x := y_1 + if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + continue + } + v.reset(OpAMD64XORQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } break } - return false -} -func rewriteValueAMD64_OpAMD64MOVQloadidx8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVQloadidx8 [c+d] {sym} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (BTCQmodify [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + y := v_1 + if y.Op != OpAMD64BTCQ { break } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQloadidx8) - v.AuxInt = c + d + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + break + } + v.reset(OpAMD64BTCQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+8*d) - // result: (MOVQloadidx8 [c+8*d] {sym} ptr idx mem) + // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (BTRQmodify [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + y := v_1 + if y.Op != OpAMD64BTRQ { break } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + 8*d)) { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQloadidx8) - v.AuxInt = c + 8*d + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + break + } + v.reset(OpAMD64BTRQmodify) + v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg3(ptr, x, mem) return true } - // match: (MOVQloadidx8 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+8*c) - // result: (MOVQload [i+8*c] {s} p mem) + // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) + // result: (BTSQmodify [off] {sym} ptr x mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + y := v_1 + if y.Op != OpAMD64BTSQ { break } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + 8*c)) { + x := y.Args[1] + l := y.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQload) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg2(p, mem) + mem := l.Args[1] + if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { + break + } + v.reset(OpAMD64BTSQmodify) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVQstore [off1+off2] {sym} ptr val mem) + // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { - off1 := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ADDQconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQstore) - v.AuxInt = off1 + off2 + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { + break + } + v.reset(OpAMD64ADDQconstmodify) + v.AuxInt = makeValAndOff(c, off) v.Aux = sym - v.AddArg3(ptr, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) - // cond: validValAndOff(c,off) - // result: (MOVQstoreconst [makeValAndOff(c,off)] {sym} ptr mem) + // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + a := v_1 + if a.Op != OpAMD64ANDQconst { break } - c := v_1.AuxInt - mem := v_2 - if !(validValAndOff(c, off)) { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQstoreconst) + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { + break + } + v.reset(OpAMD64ANDQconstmodify) v.AuxInt = makeValAndOff(c, off) v.Aux = sym v.AddArg2(ptr, mem) return true } - // match: (MOVQstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + a := v_1 + if a.Op != OpAMD64ORQconst { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVQstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVQstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVQstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { break } - v.reset(OpAMD64MOVQstoreidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64ORQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVQstoreidx1 [off] {sym} ptr idx val mem) + // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + ptr := v_0 + a := v_1 + if a.Op != OpAMD64XORQconst { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) - // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAL { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { break } - v.reset(OpAMD64MOVQstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + v.reset(OpAMD64XORQconstmodify) + v.AuxInt = makeValAndOff(c, off) + v.Aux = sym + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVQstore [off1+off2] {sym} ptr val mem) + // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { - off1 := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDLconst { + ptr := v_0 + a := v_1 + if a.Op != OpAMD64BTCQconst { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64MOVQstore) - v.AuxInt = off1 + off2 + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { + break + } + v.reset(OpAMD64BTCQconstmodify) + v.AuxInt = makeValAndOff(c, off) v.Aux = sym - v.AddArg3(ptr, val, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ADDQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ADDQmodify [off] {sym} ptr x mem) + // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDQload || y.AuxInt != off || y.Aux != sym { + a := v_1 + if a.Op != OpAMD64BTRQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64ADDQmodify) - v.AuxInt = off + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { + break + } + v.reset(OpAMD64BTRQconstmodify) + v.AuxInt = makeValAndOff(c, off) v.Aux = sym - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ANDQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ANDQmodify [off] {sym} ptr x mem) + // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) + // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) + // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDQload || y.AuxInt != off || y.Aux != sym { + a := v_1 + if a.Op != OpAMD64BTSQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + c := a.AuxInt + l := a.Args[0] + if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { break } - v.reset(OpAMD64ANDQmodify) - v.AuxInt = off + mem := l.Args[1] + ptr2 := l.Args[0] + if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { + break + } + v.reset(OpAMD64BTSQconstmodify) + v.AuxInt = makeValAndOff(c, off) v.Aux = sym - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ORQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (ORQmodify [off] {sym} ptr x mem) + // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) + // result: (MOVSDstore [off] {sym} ptr val mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORQload || y.AuxInt != off || y.Aux != sym { - break - } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + if v_1.Op != OpAMD64MOVQf2i { break } - v.reset(OpAMD64ORQmodify) + val := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVSDstore) v.AuxInt = off v.Aux = sym - v.AddArg3(ptr, x, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(XORQload x [off] {sym} ptr mem) mem) - // cond: y.Uses==1 && clobber(y) - // result: (XORQmodify [off] {sym} ptr x mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64XORQload || y.AuxInt != off || y.Aux != sym { + sc := v.AuxInt + s := v.Aux + if v_0.Op != OpAMD64ADDQconst { break } - mem := y.Args[2] - x := y.Args[0] - if ptr != y.Args[1] || mem != v_2 || !(y.Uses == 1 && clobber(y)) { + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64XORQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg2(ptr, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(ADDQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ADDQmodify [off] {sym} ptr x mem) + // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ADDQ { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ADDQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(SUBQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (SUBQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64SUBQ { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - break - } - v.reset(OpAMD64SUBQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVQstore {sym} [off] ptr y:(ANDQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ANDQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ANDQ { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ANDQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(ORQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (ORQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64ORQ { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64ORQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(XORQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (XORQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64XORQ { - break - } - _ = y.Args[1] - y_0 := y.Args[0] - y_1 := y.Args[1] - for _i0 := 0; _i0 <= 1; _i0, y_0, y_1 = _i0+1, y_1, y_0 { - l := y_0 - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - continue - } - mem := l.Args[1] - if ptr != l.Args[0] { - continue - } - x := y_1 - if mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - continue - } - v.reset(OpAMD64XORQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - break - } - // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTCQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64BTCQ { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - break - } - v.reset(OpAMD64BTCQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTRQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64BTRQ { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - break - } - v.reset(OpAMD64BTRQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem) - // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTSQmodify [off] {sym} ptr x mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - y := v_1 - if y.Op != OpAMD64BTSQ { - break - } - x := y.Args[1] - l := y.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - if ptr != l.Args[0] || mem != v_2 || !(y.Uses == 1 && l.Uses == 1 && clobber(y, l)) { - break - } - v.reset(OpAMD64BTSQmodify) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ADDQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64ADDQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ANDQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64ANDQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64ORQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64ORQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64XORQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64XORQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64BTCQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64BTCQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64BTRQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64BTRQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) - // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l, a) - // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - a := v_1 - if a.Op != OpAMD64BTSQconst { - break - } - c := a.AuxInt - l := a.Args[0] - if l.Op != OpAMD64MOVQload || l.AuxInt != off || l.Aux != sym { - break - } - mem := l.Args[1] - ptr2 := l.Args[0] - if mem != v_2 || !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l, a)) { - break - } - v.reset(OpAMD64BTSQconstmodify) - v.AuxInt = makeValAndOff(c, off) - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem) - // result: (MOVSDstore [off] {sym} ptr val mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVQf2i { - break - } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVSDstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVQstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) - for { - sc := v.AuxInt - s := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) - // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) - for { - sc := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVQstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVQstoreconst [x] {sym1} (LEAQ8 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVQstoreconstidx8 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) - for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx8) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVQstoreconst [x] {sym} (ADDQ ptr idx) mem) - // result: (MOVQstoreconstidx1 [x] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - v.reset(OpAMD64MOVQstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [c2] {s} p mem)) - // cond: config.useSSE && x.Uses == 1 && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) - // result: (MOVOstore [ValAndOff(c2).Off()] {s} p (MOVOconst [0]) mem) - for { - c := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != OpAMD64MOVQstoreconst { - break - } - c2 := x.AuxInt - if x.Aux != s { - break - } - mem := x.Args[1] - if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { - break - } - v.reset(OpAMD64MOVOstore) - v.AuxInt = ValAndOff(c2).Off() - v.Aux = s - v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128) - v0.AuxInt = 0 - v.AddArg3(p, v0, mem) - return true - } - // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) - // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) - for { - sc := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAL { - break - } - off := v_0.AuxInt - sym2 := v_0.Aux - ptr := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) - return true - } - // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) - for { - sc := v.AuxInt - s := v.Aux - if v_0.Op != OpAMD64ADDLconst { - break - } - off := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { - break - } - v.reset(OpAMD64MOVQstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s - v.AddArg2(ptr, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQstoreconstidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) - // result: (MOVQstoreconstidx8 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVQstoreconstidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVQstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVQstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVQstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVQstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVQstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstoreconstidx8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQstoreconstidx8 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVQstoreconstidx8 [ValAndOff(x).add(c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx8) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVQstoreconstidx8 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(8*c) - // result: (MOVQstoreconstidx8 [ValAndOff(x).add(8*c)] {sym} ptr idx mem) - for { - x := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(x).canAdd(8 * c)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx8) - v.AuxInt = ValAndOff(x).add(8 * c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) - // result: (MOVQstoreidx8 [c] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVQstoreidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVQstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVQstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVQstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVQstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVQstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVQstore [i+c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - break - } - // match: (MOVQstoreidx1 [off] {s} ptr idx (MOVQconst [c]) mem) - // cond: validValAndOff(c, off) - // result: (MOVQstoreconstidx1 [makeValAndOff(c, off)] {s} ptr idx mem) - for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVQconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx1) - v.AuxInt = makeValAndOff(c, off) - v.Aux = s - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVQstoreidx8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVQstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVQstoreidx8 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVQstoreidx8) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVQstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+8*d) - // result: (MOVQstoreidx8 [c+8*d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 8*d)) { - break - } - v.reset(OpAMD64MOVQstoreidx8) - v.AuxInt = c + 8*d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVQstoreidx8 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+8*c) - // result: (MOVQstore [i+8*c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 8*c)) { - break - } - v.reset(OpAMD64MOVQstore) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - // match: (MOVQstoreidx8 [off] {s} ptr idx (MOVQconst [c]) mem) - // cond: validValAndOff(c, off) - // result: (MOVQstoreconstidx8 [makeValAndOff(c, off)] {s} ptr idx mem) - for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVQconst { - break - } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(c, off)) { - break - } - v.reset(OpAMD64MOVQstoreconstidx8) - v.AuxInt = makeValAndOff(c, off) - v.Aux = s - v.AddArg3(ptr, idx, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVSDload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVSDload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDload [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDloadidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDloadidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVSDloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVSDloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) - // result: (MOVQi2f val) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVQstore || v_1.AuxInt != off || v_1.Aux != sym { - break - } - val := v_1.Args[1] - if ptr != v_1.Args[0] { - break - } - v.reset(OpAMD64MOVQi2f) - v.AddArg(val) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDloadidx1 [c] {sym} ptr (SHLQconst [3] idx) mem) - // result: (MOVSDloadidx8 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - break - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVSDloadidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVSDloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVSDload [i+c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { - break - } - v.reset(OpAMD64MOVSDload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDloadidx8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDloadidx8 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVSDloadidx8 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDloadidx8) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx8 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+8*d) - // result: (MOVSDloadidx8 [c+8*d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + 8*d)) { - break - } - v.reset(OpAMD64MOVSDloadidx8) - v.AuxInt = c + 8*d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSDloadidx8 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+8*c) - // result: (MOVSDload [i+8*c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + 8*c)) { - break - } - v.reset(OpAMD64MOVSDload) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg2(p, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVSDstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstore [off1] {sym1} (LEAQ8 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSDstoreidx8 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ8 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSDstoreidx8) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVSDstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVSDstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) - // result: (MOVQstore [off] {sym} ptr val mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVQi2f { - break - } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVQstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDstoreidx1 [c] {sym} ptr (SHLQconst [3] idx) val mem) - // result: (MOVSDstoreidx8 [c] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 3 { - break - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVSDstoreidx8) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVSDstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVSDstore [i+c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSDstoreidx8(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSDstoreidx8 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVSDstoreidx8 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSDstoreidx8) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx8 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+8*d) - // result: (MOVSDstoreidx8 [c+8*d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 8*d)) { - break - } - v.reset(OpAMD64MOVSDstoreidx8) - v.AuxInt = c + 8*d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSDstoreidx8 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+8*c) - // result: (MOVSDstore [i+8*c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 8*c)) { - break - } - v.reset(OpAMD64MOVSDstore) - v.AuxInt = i + 8*c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVSSload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVSSload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSload [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSloadidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ4 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSloadidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVSSloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVSSloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) - // result: (MOVLi2f val) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLstore || v_1.AuxInt != off || v_1.Aux != sym { - break - } - val := v_1.Args[1] - if ptr != v_1.Args[0] { - break - } - v.reset(OpAMD64MOVLi2f) - v.AddArg(val) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSloadidx1 [c] {sym} ptr (SHLQconst [2] idx) mem) - // result: (MOVSSloadidx4 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 { - break - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVSSloadidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVSSloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVSSload [i+c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { - break - } - v.reset(OpAMD64MOVSSload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSloadidx4(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSloadidx4 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVSSloadidx4 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSloadidx4) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx4 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+4*d) - // result: (MOVSSloadidx4 [c+4*d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + 4*d)) { - break - } - v.reset(OpAMD64MOVSSloadidx4) - v.AuxInt = c + 4*d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVSSloadidx4 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+4*c) - // result: (MOVSSload [i+4*c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + 4*c)) { - break - } - v.reset(OpAMD64MOVSSload) - v.AuxInt = i + 4*c - v.Aux = s - v.AddArg2(p, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVSSstore [off1+off2] {sym} ptr val mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSstoreidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstore [off1] {sym1} (LEAQ4 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVSSstoreidx4 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ4 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVSSstoreidx4) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVSSstoreidx1 [off] {sym} ptr idx val mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVSSstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break - } - // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) - // result: (MOVLstore [off] {sym} ptr val mem) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLi2f { - break - } - val := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVLstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, val, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSstoreidx1 [c] {sym} ptr (SHLQconst [2] idx) val mem) - // result: (MOVSSstoreidx4 [c] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 2 { - break - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVSSstoreidx4) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVSSstoreidx1 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVSSstore [i+c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVSSstoreidx4(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVSSstoreidx4 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVSSstoreidx4 [c+d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - break - } - v.reset(OpAMD64MOVSSstoreidx4) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx4 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+4*d) - // result: (MOVSSstoreidx4 [c+4*d] {sym} ptr idx val mem) - for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 4*d)) { - break - } - v.reset(OpAMD64MOVSSstoreidx4) - v.AuxInt = c + 4*d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - // match: (MOVSSstoreidx4 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+4*c) - // result: (MOVSSstore [i+4*c] {s} p w mem) - for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 4*c)) { - break - } - v.reset(OpAMD64MOVSSstore) - v.AuxInt = i + 4*c - v.Aux = s - v.AddArg3(p, w, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVWload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVLload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQSX (ANDLconst [c] x)) - // cond: c & 0x8000 == 0 - // result: (ANDLconst [c & 0x7fff] x) - for { - if v_0.Op != OpAMD64ANDLconst { - break - } - c := v_0.AuxInt - x := v_0.Args[0] - if !(c&0x8000 == 0) { - break - } - v.reset(OpAMD64ANDLconst) - v.AuxInt = c & 0x7fff - v.AddArg(x) - return true - } - // match: (MOVWQSX (MOVWQSX x)) - // result: (MOVWQSX x) - for { - if v_0.Op != OpAMD64MOVWQSX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) - return true - } - // match: (MOVWQSX (MOVBQSX x)) - // result: (MOVBQSX x) - for { - if v_0.Op != OpAMD64MOVBQSX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQSX) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWQSX x) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVWstore { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break - } - v.reset(OpAMD64MOVWQSX) - v.AddArg(x) - return true - } - // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVWQSXload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool { - v_0 := v.Args[0] - b := v.Block - // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVWload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVLload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWload [off] {sym} ptr mem) - for { - x := v_0 - if x.Op != OpAMD64MOVQload { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg2(ptr, mem) - return true - } - // match: (MOVWQZX x) - // cond: zeroUpper48Bits(x,3) - // result: x - for { - x := v_0 - if !(zeroUpper48Bits(x, 3)) { - break - } - v.copyOf(x) - return true - } - // match: (MOVWQZX x:(MOVWloadidx1 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWloadidx1 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != OpAMD64MOVWloadidx1 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWQZX x:(MOVWloadidx2 [off] {sym} ptr idx mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (MOVWloadidx2 [off] {sym} ptr idx mem) - for { - x := v_0 - if x.Op != OpAMD64MOVWloadidx2 { - break - } - off := x.AuxInt - sym := x.Aux - mem := x.Args[2] - ptr := x.Args[0] - idx := x.Args[1] - if !(x.Uses == 1 && clobber(x)) { - break - } - b = x.Block - v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx2, v.Type) - v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym - v0.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWQZX (ANDLconst [c] x)) - // result: (ANDLconst [c & 0xffff] x) - for { - if v_0.Op != OpAMD64ANDLconst { - break - } - c := v_0.AuxInt - x := v_0.Args[0] - v.reset(OpAMD64ANDLconst) - v.AuxInt = c & 0xffff - v.AddArg(x) - return true - } - // match: (MOVWQZX (MOVWQZX x)) - // result: (MOVWQZX x) - for { - if v_0.Op != OpAMD64MOVWQZX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVWQZX) - v.AddArg(x) - return true - } - // match: (MOVWQZX (MOVBQZX x)) - // result: (MOVBQZX x) - for { - if v_0.Op != OpAMD64MOVBQZX { - break - } - x := v_0.Args[0] - v.reset(OpAMD64MOVBQZX) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - config := b.Func.Config - // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) - // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) - // result: (MOVWQZX x) - for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVWstore { - break - } - off2 := v_1.AuxInt - sym2 := v_1.Aux - x := v_1.Args[1] - ptr2 := v_1.Args[0] - if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { - break - } - v.reset(OpAMD64MOVWQZX) - v.AddArg(x) - return true - } - // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVWload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWloadidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVWloadidx1) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWload [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWloadidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ2 { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { - break - } - v.reset(OpAMD64MOVWloadidx2) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) - return true - } - // match: (MOVWload [off] {sym} (ADDQ ptr idx) mem) - // cond: ptr.Op != OpSB - // result: (MOVWloadidx1 [off] {sym} ptr idx mem) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { - break - } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - mem := v_1 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVWloadidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) - // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) - // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) - for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAL { - break - } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - mem := v_1 - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg2(base, mem) - return true - } - // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) - // cond: is32Bit(off1+off2) - // result: (MOVWload [off1+off2] {sym} ptr mem) - for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDLconst { - break - } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(is32Bit(off1 + off2)) { - break - } - v.reset(OpAMD64MOVWload) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg2(ptr, mem) - return true - } - // match: (MOVWload [off] {sym} (SB) _) - // cond: symIsRO(sym) - // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) - for { - off := v.AuxInt - sym := v.Aux - if v_0.Op != OpSB || !(symIsRO(sym)) { - break - } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64MOVWloadidx1(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWloadidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) - // result: (MOVWloadidx2 [c] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWloadidx2) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWloadidx1 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVWloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWloadidx1 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+d) - // result: (MOVWloadidx1 [c+d] {sym} ptr idx mem) - for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVWloadidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true - } - break - } - // match: (MOVWloadidx1 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+c) - // result: (MOVWload [i+c] {s} p mem) - for { - i := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVWload) - v.AuxInt = i + c - v.Aux = s - v.AddArg2(p, mem) - return true + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + break } - break + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) + return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVWloadidx2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWloadidx2 [c] {sym} (ADDQconst [d] ptr) idx mem) - // cond: is32Bit(c+d) - // result: (MOVWloadidx2 [c+d] {sym} ptr idx mem) + // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [c2] {s} p0 mem)) + // cond: config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off() + 8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x) + // result: (MOVOstore [ValAndOff(c2).Off()] {s} p0 (MOVOconst [0]) mem) for { c := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + s := v.Aux + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVQstoreconst { break } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(is32Bit(c + d)) { + c2 := x.AuxInt + if x.Aux != s { break } - v.reset(OpAMD64MOVWloadidx2) - v.AuxInt = c + d - v.Aux = sym - v.AddArg3(ptr, idx, mem) + mem := x.Args[1] + p0 := x.Args[0] + if !(config.useSSE && x.Uses == 1 && same(p0, p1, 1) && ValAndOff(c2).Off()+8 == ValAndOff(c).Off() && ValAndOff(c).Val() == 0 && ValAndOff(c2).Val() == 0 && clobber(x)) { + break + } + v.reset(OpAMD64MOVOstore) + v.AuxInt = ValAndOff(c2).Off() + v.Aux = s + v0 := b.NewValue0(x.Pos, OpAMD64MOVOconst, types.TypeInt128) + v0.AuxInt = 0 + v.AddArg3(p0, v0, mem) return true } - // match: (MOVWloadidx2 [c] {sym} ptr (ADDQconst [d] idx) mem) - // cond: is32Bit(c+2*d) - // result: (MOVWloadidx2 [c+2*d] {sym} ptr idx mem) + // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAL { break } - d := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(is32Bit(c + 2*d)) { + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVWloadidx2) - v.AuxInt = c + 2*d - v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) return true } - // match: (MOVWloadidx2 [i] {s} p (MOVQconst [c]) mem) - // cond: is32Bit(i+2*c) - // result: (MOVWload [i+2*c] {s} p mem) + // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - i := v.AuxInt + sc := v.AuxInt s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_0.Op != OpAMD64ADDLconst { break } - c := v_1.AuxInt - mem := v_2 - if !(is32Bit(i + 2*c)) { + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVWload) - v.AuxInt = i + 2*c + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s - v.AddArg2(p, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64MOVSDload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) + // match: (MOVSDload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSDload [off1+off2] {sym} ptr mem) for { - off := v.AuxInt + off1 := v.AuxInt sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVWQSX { + if v_0.Op != OpAMD64ADDQconst { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = off + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVSDload) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg3(ptr, x, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) - // result: (MOVWstore [off] {sym} ptr x mem) + // match: (MOVSDload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVSDload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + // match: (MOVSDload [off] {sym} ptr (MOVQstore [off] {sym} ptr val _)) + // result: (MOVQi2f val) for { off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64MOVWQZX { + if v_1.Op != OpAMD64MOVQstore || v_1.AuxInt != off || v_1.Aux != sym { break } - x := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWstore) - v.AuxInt = off - v.Aux = sym - v.AddArg3(ptr, x, mem) + val := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpAMD64MOVQi2f) + v.AddArg(val) return true } - // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVSDstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSDstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // cond: is32Bit(off1+off2) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) + // result: (MOVSDstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt sym := v.Aux @@ -18459,57 +14464,81 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWstore) + v.reset(OpAMD64MOVSDstore) v.AuxInt = off1 + off2 v.Aux = sym v.AddArg3(ptr, val, mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) - // cond: validOff(off) - // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) + // match: (MOVSDstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVSDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - off := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64MOVLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - c := v_1.AuxInt + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 mem := v_2 - if !(validOff(off)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = makeValAndOff(int64(int16(c)), off) - v.Aux = sym - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVSDstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem) - // cond: validOff(off) - // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) + // match: (MOVSDstore [off] {sym} ptr (MOVQi2f val) mem) + // result: (MOVQstore [off] {sym} ptr val mem) for { off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64MOVQconst { + if v_1.Op != OpAMD64MOVQi2f { break } - c := v_1.AuxInt + val := v_1.Args[0] mem := v_2 - if !(validOff(off)) { + v.reset(OpAMD64MOVQstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, val, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVSSload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != OpAMD64ADDQconst { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = makeValAndOff(int64(int16(c)), off) + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVSSload) + v.AuxInt = off1 + off2 v.Aux = sym v.AddArg2(ptr, mem) return true } - // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // match: (MOVSSload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // result: (MOVSSload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { off1 := v.AuxInt sym1 := v.Aux @@ -18519,1139 +14548,1201 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { off2 := v_0.AuxInt sym2 := v_0.Aux base := v_0.Args[0] - val := v_1 - mem := v_2 + mem := v_1 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstore) + v.reset(OpAMD64MOVSSload) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + v.AddArg2(base, mem) + return true + } + // match: (MOVSSload [off] {sym} ptr (MOVLstore [off] {sym} ptr val _)) + // result: (MOVLi2f val) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVLstore || v_1.AuxInt != off || v_1.Aux != sym { + break + } + val := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpAMD64MOVLi2f) + v.AddArg(val) return true } - // match: (MOVWstore [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) val mem) - // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWstoreidx1 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVSSstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVSSstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVSSstore [off1+off2] {sym} ptr val mem) for { off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + sym := v.Aux + if v_0.Op != OpAMD64ADDQconst { break } off2 := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] ptr := v_0.Args[0] val := v_1 mem := v_2 - if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWstoreidx1) + v.reset(OpAMD64MOVSSstore) v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.Aux = sym + v.AddArg3(ptr, val, mem) return true } - // match: (MOVWstore [off1] {sym1} (LEAQ2 [off2] {sym2} ptr idx) val mem) + // match: (MOVSSstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) - // result: (MOVWstoreidx2 [off1+off2] {mergeSym(sym1,sym2)} ptr idx val mem) + // result: (MOVSSstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { off1 := v.AuxInt sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ2 { + if v_0.Op != OpAMD64LEAQ { break } off2 := v_0.AuxInt sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + base := v_0.Args[0] val := v_1 mem := v_2 if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstoreidx2) + v.reset(OpAMD64MOVSSstore) v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg4(ptr, idx, val, mem) + v.AddArg3(base, val, mem) return true } - // match: (MOVWstore [off] {sym} (ADDQ ptr idx) val mem) - // cond: ptr.Op != OpSB - // result: (MOVWstoreidx1 [off] {sym} ptr idx val mem) + // match: (MOVSSstore [off] {sym} ptr (MOVLi2f val) mem) + // result: (MOVLstore [off] {sym} ptr val mem) for { off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + ptr := v_0 + if v_1.Op != OpAMD64MOVLi2f { break } - _ = v_0.Args[1] - v_0_0 := v_0.Args[0] - v_0_1 := v_0.Args[1] - for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { - ptr := v_0_0 - idx := v_0_1 - val := v_1 - mem := v_2 - if !(ptr.Op != OpSB) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = off - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true - } - break + val := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVLstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, val, mem) + return true } - // match: (MOVWstore [i] {s} p (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + return false +} +func rewriteValueAMD64_OpAMD64MOVWQSX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWQSX x:(MOVWload [off] {sym} ptr mem)) // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w mem) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 { + x := v_0 + if x.Op != OpAMD64MOVWload { break } - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVWQSX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w, mem) + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) return true } - // match: (MOVWstore [i] {s} p (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p w mem)) + // match: (MOVWQSX x:(MOVQload [off] {sym} ptr mem)) // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w mem) + // result: @x.Block (MOVWQSXload [off] {sym} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - mem := x.Args[2] - if p != x.Args[0] || w != x.Args[1] || !(x.Uses == 1 && clobber(x)) { + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWQSXload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVWQSX (ANDLconst [c] x)) + // cond: c & 0x8000 == 0 + // result: (ANDLconst [c & 0x7fff] x) + for { + if v_0.Op != OpAMD64ANDLconst { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w, mem) + c := v_0.AuxInt + x := v_0.Args[0] + if !(c&0x8000 == 0) { + break + } + v.reset(OpAMD64ANDLconst) + v.AuxInt = c & 0x7fff + v.AddArg(x) return true } - // match: (MOVWstore [i] {s} p (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w0 mem) + // match: (MOVWQSX (MOVWQSX x)) + // result: (MOVWQSX x) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64SHRLconst { + if v_0.Op != OpAMD64MOVWQSX { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + x := v_0.Args[0] + v.reset(OpAMD64MOVWQSX) + v.AddArg(x) + return true + } + // match: (MOVWQSX (MOVBQSX x)) + // result: (MOVBQSX x) + for { + if v_0.Op != OpAMD64MOVBQSX { break } - mem := x.Args[2] - if p != x.Args[0] { + x := v_0.Args[0] + v.reset(OpAMD64MOVBQSX) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVWQSXload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWQSXload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWQSX x) + for { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVWstore { break } - w0 := x.Args[1] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w0, mem) + v.reset(OpAMD64MOVWQSX) + v.AddArg(x) return true } - // match: (MOVWstore [i] {s} p (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p w0:(SHRQconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstore [i-2] {s} p w0 mem) + // match: (MOVWQSXload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWQSXload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64SHRQconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - j := v_1.AuxInt - w := v_1.Args[0] - x := v_2 - if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v_1 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - mem := x.Args[2] - if p != x.Args[0] { + v.reset(OpAMD64MOVWQSXload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(base, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64MOVWQZX(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (MOVWQZX x:(MOVWload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVWload { break } - w0 := x.Args[1] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg3(p, w0, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) return true } - // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem)) - // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2) - // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) + // match: (MOVWQZX x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - x1 := v_1 - if x1.Op != OpAMD64MOVWload { - break - } - j := x1.AuxInt - s2 := x1.Aux - mem := x1.Args[1] - p2 := x1.Args[0] - mem2 := v_2 - if mem2.Op != OpAMD64MOVWstore || mem2.AuxInt != i-2 || mem2.Aux != s { + x := v_0 + if x.Op != OpAMD64MOVLload { break } - _ = mem2.Args[2] - if p != mem2.Args[0] { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - x2 := mem2.Args[1] - if x2.Op != OpAMD64MOVWload || x2.AuxInt != j-2 || x2.Aux != s2 { + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) + return true + } + // match: (MOVWQZX x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (MOVWload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVQload { break } - _ = x2.Args[1] - if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) { + off := x.AuxInt + sym := x.Aux + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { break } - v.reset(OpAMD64MOVLstore) - v.AuxInt = i - 2 - v.Aux = s - v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32) - v0.AuxInt = j - 2 - v0.Aux = s2 - v0.AddArg2(p2, mem) - v.AddArg3(p, v0, mem) + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64MOVWload, v.Type) + v.copyOf(v0) + v0.AuxInt = off + v0.Aux = sym + v0.AddArg2(ptr, mem) return true } - // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) - // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) - // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + // match: (MOVWQZX x) + // cond: zeroUpper48Bits(x,3) + // result: x for { - off1 := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAL { + x := v_0 + if !(zeroUpper48Bits(x, 3)) { break } - off2 := v_0.AuxInt - sym2 := v_0.Aux - base := v_0.Args[0] - val := v_1 - mem := v_2 - if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { + v.copyOf(x) + return true + } + // match: (MOVWQZX (ANDLconst [c] x)) + // result: (ANDLconst [c & 0xffff] x) + for { + if v_0.Op != OpAMD64ANDLconst { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(base, val, mem) + c := v_0.AuxInt + x := v_0.Args[0] + v.reset(OpAMD64ANDLconst) + v.AuxInt = c & 0xffff + v.AddArg(x) return true } - // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) - // cond: is32Bit(off1+off2) - // result: (MOVWstore [off1+off2] {sym} ptr val mem) + // match: (MOVWQZX (MOVWQZX x)) + // result: (MOVWQZX x) for { - off1 := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDLconst { + if v_0.Op != OpAMD64MOVWQZX { break } - off2 := v_0.AuxInt - ptr := v_0.Args[0] - val := v_1 - mem := v_2 - if !(is32Bit(off1 + off2)) { + x := v_0.Args[0] + v.reset(OpAMD64MOVWQZX) + v.AddArg(x) + return true + } + // match: (MOVWQZX (MOVBQZX x)) + // result: (MOVBQZX x) + for { + if v_0.Op != OpAMD64MOVBQZX { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = off1 + off2 - v.Aux = sym - v.AddArg3(ptr, val, mem) + x := v_0.Args[0] + v.reset(OpAMD64MOVBQZX) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVWload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + b := v.Block + config := b.Func.Config + // match: (MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) + // cond: sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) + // result: (MOVWQZX x) for { - sc := v.AuxInt - s := v.Aux - if v_0.Op != OpAMD64ADDQconst { + off := v.AuxInt + sym := v.Aux + ptr := v_0 + if v_1.Op != OpAMD64MOVWstore { break } - off := v_0.AuxInt - ptr := v_0.Args[0] - mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { + off2 := v_1.AuxInt + sym2 := v_1.Aux + x := v_1.Args[1] + ptr2 := v_1.Args[0] + if !(sym == sym2 && off == off2 && isSamePtr(ptr, ptr2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s - v.AddArg2(ptr, mem) + v.reset(OpAMD64MOVWQZX) + v.AddArg(x) return true } - // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (MOVWload [off1] {sym} (ADDQconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { - sc := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ { + off1 := v.AuxInt + sym := v.Aux + if v_0.Op != OpAMD64ADDQconst { break } - off := v_0.AuxInt - sym2 := v_0.Aux + off2 := v_0.AuxInt ptr := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = mergeSym(sym1, sym2) + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 + v.Aux = sym v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconst [x] {sym1} (LEAQ1 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVWload [off1] {sym1} (LEAQ [off2] {sym2} base) mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - x := v.AuxInt + off1 := v.AuxInt sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ1 { + if v_0.Op != OpAMD64LEAQ { break } - off := v_0.AuxInt + off2 := v_0.AuxInt sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + base := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2)) { + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(off) + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.AddArg2(base, mem) return true } - // match: (MOVWstoreconst [x] {sym1} (LEAQ2 [off] {sym2} ptr idx) mem) - // cond: canMergeSym(sym1, sym2) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(off)] {mergeSym(sym1,sym2)} ptr idx mem) + // match: (MOVWload [i0] {s0} l:(LEAQ1 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWload [i0+i1] {s0} (LEAQ1 [0] {s1} x y) mem) for { - x := v.AuxInt - sym1 := v.Aux - if v_0.Op != OpAMD64LEAQ2 { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - off := v_0.AuxInt - sym2 := v_0.Aux - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2)) { + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(off) - v.Aux = mergeSym(sym1, sym2) - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVWload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVWstoreconst [x] {sym} (ADDQ ptr idx) mem) - // result: (MOVWstoreconstidx1 [x] {sym} ptr idx mem) + // match: (MOVWload [i0] {s0} l:(LEAQ2 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWload [i0+i1] {s0} (LEAQ2 [0] {s1} x y) mem) for { - x := v.AuxInt - sym := v.Aux - if v_0.Op != OpAMD64ADDQ { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { break } - idx := v_0.Args[1] - ptr := v_0.Args[0] + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] mem := v_1 - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = x - v.Aux = sym - v.AddArg3(ptr, idx, mem) + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVWload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + // match: (MOVWload [i0] {s0} l:(LEAQ4 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWload [i0+i1] {s0} (LEAQ4 [0] {s1} x y) mem) for { - c := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != OpAMD64MOVWstoreconst { - break - } - a := x.AuxInt - if x.Aux != s { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { break } - mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg2(p, mem) + v.reset(OpAMD64MOVWload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p mem) + // match: (MOVWload [i0] {s0} l:(LEAQ8 [i1] {s1} x y) mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWload [i0+i1] {s0} (LEAQ8 [0] {s1} x y) mem) for { - a := v.AuxInt - s := v.Aux - p := v_0 - x := v_1 - if x.Op != OpAMD64MOVWstoreconst { - break - } - c := x.AuxInt - if x.Aux != s { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { break } - mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + mem := v_1 + if !(i1 != 0 && is32Bit(i0+i1)) { break } - v.reset(OpAMD64MOVLstoreconst) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg2(p, mem) + v.reset(OpAMD64MOVWload) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg2(v0, mem) return true } - // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) - // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) for { - sc := v.AuxInt + off1 := v.AuxInt sym1 := v.Aux if v_0.Op != OpAMD64LEAL { break } - off := v_0.AuxInt + off2 := v_0.AuxInt sym2 := v_0.Aux - ptr := v_0.Args[0] + base := v_0.Args[0] mem := v_1 - if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 v.Aux = mergeSym(sym1, sym2) - v.AddArg2(ptr, mem) + v.AddArg2(base, mem) return true } - // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) - // cond: ValAndOff(sc).canAdd(off) - // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVWload [off1+off2] {sym} ptr mem) for { - sc := v.AuxInt - s := v.Aux + off1 := v.AuxInt + sym := v.Aux if v_0.Op != OpAMD64ADDLconst { break } - off := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] mem := v_1 - if !(ValAndOff(sc).canAdd(off)) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWstoreconst) - v.AuxInt = ValAndOff(sc).add(off) - v.Aux = s + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 + v.Aux = sym v.AddArg2(ptr, mem) return true } + // match: (MOVWload [off] {sym} (SB) _) + // cond: symIsRO(sym) + // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) + for { + off := v.AuxInt + sym := v.Aux + if v_0.Op != OpSB || !(symIsRO(sym)) { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder)) + return true + } return false } -func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value) bool { +func rewriteValueAMD64_OpAMD64MOVWstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (MOVWstoreconstidx1 [c] {sym} ptr (SHLQconst [1] idx) mem) - // result: (MOVWstoreconstidx2 [c] {sym} ptr idx mem) + b := v.Block + typ := &b.Func.Config.Types + // match: (MOVWstore [off] {sym} ptr (MOVWQSX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - c := v.AuxInt + off := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 { - continue - } - idx := v_1.Args[0] - mem := v_2 - v.reset(OpAMD64MOVWstoreconstidx2) - v.AuxInt = c - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + ptr := v_0 + if v_1.Op != OpAMD64MOVWQSX { + break } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) + return true } - // match: (MOVWstoreconstidx1 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off] {sym} ptr (MOVWQZX x) mem) + // result: (MOVWstore [off] {sym} ptr x mem) for { - x := v.AuxInt + off := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + ptr := v_0 + if v_1.Op != OpAMD64MOVWQZX { + break } - break + x := v_1.Args[0] + mem := v_2 + v.reset(OpAMD64MOVWstore) + v.AuxInt = off + v.Aux = sym + v.AddArg3(ptr, x, mem) + return true } - // match: (MOVWstoreconstidx1 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVWstoreconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off1] {sym} (ADDQconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { - x := v.AuxInt + off1 := v.AuxInt sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - c := v_1.AuxInt - idx := v_1.Args[0] - mem := v_2 - if !(ValAndOff(x).canAdd(c)) { - continue - } - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = ValAndOff(x).add(c) - v.Aux = sym - v.AddArg3(ptr, idx, mem) - return true + if v_0.Op != OpAMD64ADDQconst { + break } - break - } - // match: (MOVWstoreconstidx1 [c] {s} p i x:(MOVWstoreconstidx1 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p i mem) - for { - c := v.AuxInt - s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - i := v_1 - x := v_2 - if x.Op != OpAMD64MOVWstoreconstidx1 { - continue - } - a := x.AuxInt - if x.Aux != s { - continue - } - mem := x.Args[2] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || i != x_1 || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { - continue - } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v.AddArg3(p, i, mem) - return true - } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { + break } - break + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg3(ptr, val, mem) + return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVWstoreconstidx2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MOVWstoreconstidx2 [x] {sym} (ADDQconst [c] ptr) idx mem) - // cond: ValAndOff(x).canAdd(c) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(c)] {sym} ptr idx mem) + // match: (MOVWstore [off] {sym} ptr (MOVLconst [c]) mem) + // cond: validOff(off) + // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) for { - x := v.AuxInt + off := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + ptr := v_0 + if v_1.Op != OpAMD64MOVLconst { break } - c := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 + c := v_1.AuxInt mem := v_2 - if !(ValAndOff(x).canAdd(c)) { + if !(validOff(off)) { break } - v.reset(OpAMD64MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(c) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = makeValAndOff(int64(int16(c)), off) v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconstidx2 [x] {sym} ptr (ADDQconst [c] idx) mem) - // cond: ValAndOff(x).canAdd(2*c) - // result: (MOVWstoreconstidx2 [ValAndOff(x).add(2*c)] {sym} ptr idx mem) + // match: (MOVWstore [off] {sym} ptr (MOVQconst [c]) mem) + // cond: validOff(off) + // result: (MOVWstoreconst [makeValAndOff(int64(int16(c)),off)] {sym} ptr mem) for { - x := v.AuxInt + off := v.AuxInt sym := v.Aux ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + if v_1.Op != OpAMD64MOVQconst { break } c := v_1.AuxInt - idx := v_1.Args[0] mem := v_2 - if !(ValAndOff(x).canAdd(2 * c)) { + if !(validOff(off)) { break } - v.reset(OpAMD64MOVWstoreconstidx2) - v.AuxInt = ValAndOff(x).add(2 * c) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = makeValAndOff(int64(int16(c)), off) v.Aux = sym - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconstidx2 [c] {s} p i x:(MOVWstoreconstidx2 [a] {s} p i mem)) - // cond: x.Uses == 1 && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) - // result: (MOVLstoreconstidx1 [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p (SHLQconst [1] i) mem) + // match: (MOVWstore [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) + // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - c := v.AuxInt - s := v.Aux - p := v_0 - i := v_1 - x := v_2 - if x.Op != OpAMD64MOVWstoreconstidx2 { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - a := x.AuxInt - if x.Aux != s { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) { break } - mem := x.Args[2] - if p != x.Args[0] || i != x.Args[1] || !(x.Uses == 1 && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) + return true + } + // match: (MOVWstore [i0] {s0} l:(LEAQ1 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWstore [i0+i1] {s0} (LEAQ1 [0] {s1} x y) val mem) + for { + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ1 { break } - v.reset(OpAMD64MOVLstoreconstidx1) - v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, i.Type) - v0.AuxInt = 1 - v0.AddArg(i) - v.AddArg3(p, v0, mem) + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ1, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVWstoreidx1(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (MOVWstoreidx1 [c] {sym} ptr (SHLQconst [1] idx) val mem) - // result: (MOVWstoreidx2 [c] {sym} ptr idx val mem) + // match: (MOVWstore [i0] {s0} l:(LEAQ2 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWstore [i0+i1] {s0} (LEAQ2 [0] {s1} x y) val mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64SHLQconst || v_1.AuxInt != 1 { - continue - } - idx := v_1.Args[0] - val := v_2 - mem := v_3 - v.reset(OpAMD64MOVWstoreidx2) - v.AuxInt = c - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ2 { + break } - break + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ2, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true } - // match: (MOVWstoreidx1 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem) + // match: (MOVWstore [i0] {s0} l:(LEAQ4 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWstore [i0+i1] {s0} (LEAQ4 [0] {s1} x y) val mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64ADDQconst { - continue - } - d := v_0.AuxInt - ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ4 { + break } - break + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ4, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true } - // match: (MOVWstoreidx1 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+d) - // result: (MOVWstoreidx1 [c+d] {sym} ptr idx val mem) + // match: (MOVWstore [i0] {s0} l:(LEAQ8 [i1] {s1} x y) val mem) + // cond: i1 != 0 && is32Bit(i0+i1) + // result: (MOVWstore [i0+i1] {s0} (LEAQ8 [0] {s1} x y) val mem) for { - c := v.AuxInt - sym := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { - continue - } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { - continue - } - v.reset(OpAMD64MOVWstoreidx1) - v.AuxInt = c + d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) - return true + i0 := v.AuxInt + s0 := v.Aux + l := v_0 + if l.Op != OpAMD64LEAQ8 { + break } - break + i1 := l.AuxInt + s1 := l.Aux + y := l.Args[1] + x := l.Args[0] + val := v_1 + mem := v_2 + if !(i1 != 0 && is32Bit(i0+i1)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = i0 + i1 + v.Aux = s0 + v0 := b.NewValue0(v.Pos, OpAMD64LEAQ8, l.Type) + v0.AuxInt = 0 + v0.Aux = s1 + v0.AddArg2(x, y) + v.AddArg3(v0, val, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVLstore [i-2] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + p1 := v_0 + if v_1.Op != OpAMD64SHRLconst || v_1.AuxInt != 16 { + break } - break + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p0, w, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx1 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [16] w) x:(MOVWstore [i-2] {s} p0 w mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVLstore [i-2] {s} p0 w mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 { - continue - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w, mem) - return true - } + p1 := v_0 + if v_1.Op != OpAMD64SHRQconst || v_1.AuxInt != 16 { + break } - break + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + if w != x.Args[1] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p0, w, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRLconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRLconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRLconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRLconst [j-16] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVLstore [i-2] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRLconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } + p1 := v_0 + if v_1.Op != OpAMD64SHRLconst { + break } - break + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + w0 := x.Args[1] + if w0.Op != OpAMD64SHRLconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p0, w0, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx1 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p idx w0 mem) + // match: (MOVWstore [i] {s} p1 (SHRQconst [j] w) x:(MOVWstore [i-2] {s} p0 w0:(SHRQconst [j-16] w) mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && clobber(x) + // result: (MOVLstore [i-2] {s} p0 w0 mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst { - continue - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx1 || x.AuxInt != i-2 || x.Aux != s { - continue - } - mem := x.Args[3] - x_0 := x.Args[0] - x_1 := x.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { - if p != x_0 || idx != x_1 { - continue - } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { - continue - } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v.AddArg4(p, idx, w0, mem) - return true - } + p1 := v_0 + if v_1.Op != OpAMD64SHRQconst { + break } - break + j := v_1.AuxInt + w := v_1.Args[0] + x := v_2 + if x.Op != OpAMD64MOVWstore || x.AuxInt != i-2 || x.Aux != s { + break + } + mem := x.Args[2] + p0 := x.Args[0] + w0 := x.Args[1] + if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && same(p0, p1, 1) && clobber(x)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v.AddArg3(p0, w0, mem) + return true } - // match: (MOVWstoreidx1 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+c) - // result: (MOVWstore [i+c] {s} p w mem) + // match: (MOVWstore [i] {s} p x1:(MOVWload [j] {s2} p2 mem) mem2:(MOVWstore [i-2] {s} p x2:(MOVWload [j-2] {s2} p2 mem) mem)) + // cond: x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2) + // result: (MOVLstore [i-2] {s} p (MOVLload [j-2] {s2} p2 mem) mem) for { i := v.AuxInt s := v.Aux - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - p := v_0 - if v_1.Op != OpAMD64MOVQconst { - continue - } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + c)) { - continue - } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i + c - v.Aux = s - v.AddArg3(p, w, mem) - return true + p := v_0 + x1 := v_1 + if x1.Op != OpAMD64MOVWload { + break } - break + j := x1.AuxInt + s2 := x1.Aux + mem := x1.Args[1] + p2 := x1.Args[0] + mem2 := v_2 + if mem2.Op != OpAMD64MOVWstore || mem2.AuxInt != i-2 || mem2.Aux != s { + break + } + _ = mem2.Args[2] + if p != mem2.Args[0] { + break + } + x2 := mem2.Args[1] + if x2.Op != OpAMD64MOVWload || x2.AuxInt != j-2 || x2.Aux != s2 { + break + } + _ = x2.Args[1] + if p2 != x2.Args[0] || mem != x2.Args[1] || mem != mem2.Args[2] || !(x1.Uses == 1 && x2.Uses == 1 && mem2.Uses == 1 && clobber(x1, x2, mem2)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = i - 2 + v.Aux = s + v0 := b.NewValue0(x2.Pos, OpAMD64MOVLload, typ.UInt32) + v0.AuxInt = j - 2 + v0.Aux = s2 + v0.AddArg2(p2, mem) + v.AddArg3(p, v0, mem) + return true } - // match: (MOVWstoreidx1 [off] {s} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(int64(int16(c)), off) - // result: (MOVWstoreconstidx1 [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem) + // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) && is32Bit(off1+off2) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) for { - off := v.AuxInt - s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVLconst { + off1 := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAL { break } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(int64(int16(c)), off)) { + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v_1 + mem := v_2 + if !(canMergeSym(sym1, sym2) && is32Bit(off1+off2)) { break } - v.reset(OpAMD64MOVWstoreconstidx1) - v.AuxInt = makeValAndOff(int64(int16(c)), off) - v.Aux = s - v.AddArg3(ptr, idx, mem) + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg3(base, val, mem) return true } - return false -} -func rewriteValueAMD64_OpAMD64MOVWstoreidx2(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (MOVWstoreidx2 [c] {sym} (ADDQconst [d] ptr) idx val mem) - // cond: is32Bit(c+d) - // result: (MOVWstoreidx2 [c+d] {sym} ptr idx val mem) + // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) for { - c := v.AuxInt + off1 := v.AuxInt sym := v.Aux - if v_0.Op != OpAMD64ADDQconst { + if v_0.Op != OpAMD64ADDLconst { break } - d := v_0.AuxInt + off2 := v_0.AuxInt ptr := v_0.Args[0] - idx := v_1 - val := v_2 - mem := v_3 - if !(is32Bit(c + d)) { + val := v_1 + mem := v_2 + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWstoreidx2) - v.AuxInt = c + d + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.AddArg3(ptr, val, mem) return true } - // match: (MOVWstoreidx2 [c] {sym} ptr (ADDQconst [d] idx) val mem) - // cond: is32Bit(c+2*d) - // result: (MOVWstoreidx2 [c+2*d] {sym} ptr idx val mem) + return false +} +func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (MOVWstoreconst [sc] {s} (ADDQconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - c := v.AuxInt - sym := v.Aux - ptr := v_0 - if v_1.Op != OpAMD64ADDQconst { + sc := v.AuxInt + s := v.Aux + if v_0.Op != OpAMD64ADDQconst { break } - d := v_1.AuxInt - idx := v_1.Args[0] - val := v_2 - mem := v_3 - if !(is32Bit(c + 2*d)) { + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVWstoreidx2) - v.AuxInt = c + 2*d - v.Aux = sym - v.AddArg4(ptr, idx, val, mem) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p idx (SHRLconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst [1] idx) w mem) + // match: (MOVWstoreconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRLconst || v_2.AuxInt != 16 { - break - } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAQ { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 - v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type) - v0.AuxInt = 1 - v0.AddArg(idx) - v.AddArg4(p, v0, w, mem) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [16] w) x:(MOVWstoreidx2 [i-2] {s} p idx w mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst [1] idx) w mem) + // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) for { - i := v.AuxInt + c := v.AuxInt s := v.Aux - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst || v_2.AuxInt != 16 { + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVWstoreconst { break } - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { + a := x.AuxInt + if x.Aux != s { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] || w != x.Args[2] || !(x.Uses == 1 && clobber(x)) { + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type) - v0.AuxInt = 1 - v0.AddArg(idx) - v.AddArg4(p, v0, w, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p idx (SHRQconst [j] w) x:(MOVWstoreidx2 [i-2] {s} p idx w0:(SHRQconst [j-16] w) mem)) - // cond: x.Uses == 1 && clobber(x) - // result: (MOVLstoreidx1 [i-2] {s} p (SHLQconst [1] idx) w0 mem) + // match: (MOVWstoreconst [a] {s} p1 x:(MOVWstoreconst [c] {s} p0 mem)) + // cond: x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off() + 2 == ValAndOff(c).Off() && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(ValAndOff(a).Val()&0xffff | ValAndOff(c).Val()<<16, ValAndOff(a).Off())] {s} p0 mem) for { - i := v.AuxInt + a := v.AuxInt s := v.Aux - p := v_0 - idx := v_1 - if v_2.Op != OpAMD64SHRQconst { - break - } - j := v_2.AuxInt - w := v_2.Args[0] - x := v_3 - if x.Op != OpAMD64MOVWstoreidx2 || x.AuxInt != i-2 || x.Aux != s { + p1 := v_0 + x := v_1 + if x.Op != OpAMD64MOVWstoreconst { break } - mem := x.Args[3] - if p != x.Args[0] || idx != x.Args[1] { + c := x.AuxInt + if x.Aux != s { break } - w0 := x.Args[2] - if w0.Op != OpAMD64SHRQconst || w0.AuxInt != j-16 || w != w0.Args[0] || !(x.Uses == 1 && clobber(x)) { + mem := x.Args[1] + p0 := x.Args[0] + if !(x.Uses == 1 && same(p0, p1, 1) && ValAndOff(a).Off()+2 == ValAndOff(c).Off() && clobber(x)) { break } - v.reset(OpAMD64MOVLstoreidx1) - v.AuxInt = i - 2 + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = makeValAndOff(ValAndOff(a).Val()&0xffff|ValAndOff(c).Val()<<16, ValAndOff(a).Off()) v.Aux = s - v0 := b.NewValue0(v.Pos, OpAMD64SHLQconst, idx.Type) - v0.AuxInt = 1 - v0.AddArg(idx) - v.AddArg4(p, v0, w0, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVWstoreidx2 [i] {s} p (MOVQconst [c]) w mem) - // cond: is32Bit(i+2*c) - // result: (MOVWstore [i+2*c] {s} p w mem) + // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) for { - i := v.AuxInt - s := v.Aux - p := v_0 - if v_1.Op != OpAMD64MOVQconst { + sc := v.AuxInt + sym1 := v.Aux + if v_0.Op != OpAMD64LEAL { break } - c := v_1.AuxInt - w := v_2 - mem := v_3 - if !(is32Bit(i + 2*c)) { + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVWstore) - v.AuxInt = i + 2*c - v.Aux = s - v.AddArg3(p, w, mem) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreidx2 [off] {s} ptr idx (MOVLconst [c]) mem) - // cond: validValAndOff(int64(int16(c)), off) - // result: (MOVWstoreconstidx2 [makeValAndOff(int64(int16(c)), off)] {s} ptr idx mem) + // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) for { - off := v.AuxInt + sc := v.AuxInt s := v.Aux - ptr := v_0 - idx := v_1 - if v_2.Op != OpAMD64MOVLconst { + if v_0.Op != OpAMD64ADDLconst { break } - c := v_2.AuxInt - mem := v_3 - if !(validValAndOff(int64(int16(c)), off)) { + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v_1 + if !(ValAndOff(sc).canAdd(off)) { break } - v.reset(OpAMD64MOVWstoreconstidx2) - v.AuxInt = makeValAndOff(int64(int16(c)), off) + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) v.Aux = s - v.AddArg3(ptr, idx, mem) + v.AddArg2(ptr, mem) return true } return false @@ -21672,9 +17763,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(x) return true } - // match: (ORL x0:(MOVBload [i0] {s} p mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) + // match: (ORL x0:(MOVBload [i0] {s} p0 mem) sh:(SHLLconst [8] x1:(MOVBload [i1] {s} p1 mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -21684,7 +17775,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -21698,7 +17789,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x1.Args[0] + if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -21706,14 +17798,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p, mem) + v0.AddArg2(p0, mem) return true } break } - // match: (ORL x0:(MOVWload [i0] {s} p mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) + // match: (ORL x0:(MOVWload [i0] {s} p0 mem) sh:(SHLLconst [16] x1:(MOVWload [i1] {s} p1 mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -21723,7 +17815,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -21737,7 +17829,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x1.Args[0] + if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -21745,14 +17838,14 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p, mem) + v0.AddArg2(p0, mem) return true } break } - // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p mem)) y) + // match: (ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWload [i0] {s} p0 mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -21767,7 +17860,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] or := v_1 if or.Op != OpAMD64ORL { continue @@ -21790,11 +17883,12 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] { + p1 := x0.Args[0] + if mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -21805,7 +17899,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v2.AuxInt = i0 v2.Aux = s - v2.AddArg2(p, mem) + v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) return true @@ -21813,177 +17907,9 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } break } - // match: (ORL x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - sh := v_1 - if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { - continue - } - x1 := sh.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORL x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLLconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - sh := v_1 - if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { - continue - } - x1 := sh.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s1 := v_0 - if s1.Op != OpAMD64SHLLconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - or := v_1 - if or.Op != OpAMD64ORL { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s0 := or_0 - if s0.Op != OpAMD64SHLLconst { - continue - } - j0 := s0.AuxInt - x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v1.AuxInt = j0 - v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v2.AuxInt = i0 - v2.Aux = s - v2.AddArg3(p, idx, mem) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - } - } - break - } - // match: (ORL x1:(MOVBload [i1] {s} p mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) + // match: (ORL x1:(MOVBload [i1] {s} p0 mem) sh:(SHLLconst [8] x0:(MOVBload [i0] {s} p1 mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -21993,7 +17919,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { continue @@ -22007,7 +17933,8 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x0.Args[0] + if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -22017,15 +17944,15 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s - v1.AddArg2(p, mem) + v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) + // match: (ORL r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -22039,7 +17966,7 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { continue @@ -22057,199 +17984,25 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + p1 := x0.Args[0] + if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) v0 := b.NewValue0(x0.Pos, OpAMD64BSWAPL, v.Type) v.copyOf(v0) v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg2(p, mem) - v0.AddArg(v1) - return true - } - break - } - // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s0 := v_0 - if s0.Op != OpAMD64SHLLconst { - continue - } - j0 := s0.AuxInt - x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBload { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[1] - p := x0.Args[0] - or := v_1 - if or.Op != OpAMD64ORL { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { - s1 := or_0 - if s1.Op != OpAMD64SHLLconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBload { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] { - continue - } - y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type) - v1.AuxInt = j1 - v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) - v2.AuxInt = 8 - v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) - v3.AuxInt = i0 - v3.Aux = s - v3.AddArg2(p, mem) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - break - } - // match: (ORL x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLLconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x1 := v_0 - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - sh := v_1 - if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 8 { - continue - } - x0 := sh.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type) - v.copyOf(v0) - v0.AuxInt = 8 - v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg3(p, idx, mem) - v0.AddArg(v1) - return true - } - } - } - break - } - // match: (ORL r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLLconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - r1 := v_0 - if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { - continue - } - x1 := r1.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - sh := v_1 - if sh.Op != OpAMD64SHLLconst || sh.AuxInt != 16 { - continue - } - r0 := sh.Args[0] - if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { - continue - } - x0 := r0.Args[0] - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg3(p, idx, mem) - v0.AddArg(v1) - return true - } - } + v1.AuxInt = i0 + v1.Aux = s + v1.AddArg2(p0, mem) + v0.AddArg(v1) + return true } break } - // match: (ORL s0:(SHLLconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) + // match: (ORL s0:(SHLLconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORL s1:(SHLLconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORL (SHLLconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -22258,66 +18011,58 @@ func rewriteValueAMD64_OpAMD64ORL(v *Value) bool { } j0 := s0.AuxInt x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { + if x0.Op != OpAMD64MOVBload { continue } i0 := x0.AuxInt s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - or := v_1 - if or.Op != OpAMD64ORL { + mem := x0.Args[1] + p0 := x0.Args[0] + or := v_1 + if or.Op != OpAMD64ORL { + continue + } + _ = or.Args[1] + or_0 := or.Args[0] + or_1 := or.Args[1] + for _i1 := 0; _i1 <= 1; _i1, or_0, or_1 = _i1+1, or_1, or_0 { + s1 := or_0 + if s1.Op != OpAMD64SHLLconst { continue } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s1 := or_0 - if s1.Op != OpAMD64SHLLconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORL, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLLconst, v.Type) - v1.AuxInt = j1 - v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, typ.UInt16) - v2.AuxInt = 8 - v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v3.AuxInt = i0 - v3.Aux = s - v3.AddArg3(p, idx, mem) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } + j1 := s1.AuxInt + x1 := s1.Args[0] + if x1.Op != OpAMD64MOVBload { + continue + } + i1 := x1.AuxInt + if x1.Aux != s { + continue } + _ = x1.Args[1] + p1 := x1.Args[0] + if mem != x1.Args[1] { + continue + } + y := or_1 + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + continue + } + b = mergePoint(b, x0, x1, y) + v0 := b.NewValue0(x1.Pos, OpAMD64ORL, v.Type) + v.copyOf(v0) + v1 := b.NewValue0(x1.Pos, OpAMD64SHLLconst, v.Type) + v1.AuxInt = j1 + v2 := b.NewValue0(x1.Pos, OpAMD64ROLWconst, typ.UInt16) + v2.AuxInt = 8 + v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) + v3.AuxInt = i0 + v3.Aux = s + v3.AddArg2(p0, mem) + v2.AddArg(v3) + v1.AddArg(v2) + v0.AddArg2(v1, y) + return true } } break @@ -22935,9 +18680,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(x) return true } - // match: (ORQ x0:(MOVBload [i0] {s} p mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p mem) + // match: (ORQ x0:(MOVBload [i0] {s} p0 mem) sh:(SHLQconst [8] x1:(MOVBload [i1] {s} p1 mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVWload [i0] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -22947,7 +18692,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -22961,7 +18706,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x1.Args[0] + if mem != x1.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -22969,14 +18715,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p, mem) + v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVWload [i0] {s} p mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p mem) + // match: (ORQ x0:(MOVWload [i0] {s} p0 mem) sh:(SHLQconst [16] x1:(MOVWload [i1] {s} p1 mem))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVLload [i0] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -22986,7 +18732,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -23000,7 +18746,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x1.Args[0] + if mem != x1.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -23008,14 +18755,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p, mem) + v0.AddArg2(p0, mem) return true } break } - // match: (ORQ x0:(MOVLload [i0] {s} p mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p mem))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p mem) + // match: (ORQ x0:(MOVLload [i0] {s} p0 mem) sh:(SHLQconst [32] x1:(MOVLload [i1] {s} p1 mem))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (MOVQload [i0] {s} p0 mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x0 := v_0 @@ -23025,7 +18772,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -23039,7 +18786,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x1.Args[0] + if mem != x1.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -23047,14 +18795,14 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v.copyOf(v0) v0.AuxInt = i0 v0.Aux = s - v0.AddArg2(p, mem) + v0.AddArg2(p0, mem) return true } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p1 mem)) y)) + // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWload [i0] {s} p0 mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -23069,7 +18817,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -23092,11 +18840,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] { + p1 := x0.Args[0] + if mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -23107,7 +18856,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v2.AuxInt = i0 v2.Aux = s - v2.AddArg2(p, mem) + v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) return true @@ -23115,9 +18864,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p mem)) y)) - // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p mem)) y) + // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWload [i1] {s} p0 mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWload [i0] {s} p1 mem)) y)) + // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLload [i0] {s} p0 mem)) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s1 := v_0 @@ -23132,7 +18881,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -23155,11 +18904,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] { + p1 := x0.Args[0] + if mem != x0.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -23170,7 +18920,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v2 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v2.AuxInt = i0 v2.Aux = s - v2.AddArg2(p, mem) + v2.AddArg2(p0, mem) v1.AddArg(v2) v0.AddArg2(v1, y) return true @@ -23178,297 +18928,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ x0:(MOVBloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [8] x1:(MOVBloadidx1 [i1] {s} p idx mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVWloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { - continue - } - x1 := sh.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, v.Type) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORQ x0:(MOVWloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [16] x1:(MOVWloadidx1 [i1] {s} p idx mem))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVLloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { - continue - } - x1 := sh.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORQ x0:(MOVLloadidx1 [i0] {s} p idx mem) sh:(SHLQconst [32] x1:(MOVLloadidx1 [i1] {s} p idx mem))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (MOVQloadidx1 [i0] {s} p idx mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x0 := v_0 - if x0.Op != OpAMD64MOVLloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { - continue - } - x1 := sh.Args[0] - if x1.Op != OpAMD64MOVLloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x1_0, x1_1 = _i2+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64) - v.copyOf(v0) - v0.AuxInt = i0 - v0.Aux = s - v0.AddArg3(p, idx, mem) - return true - } - } - } - break - } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) y)) - // cond: i1 == i0+1 && j1 == j0+8 && j0 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVWloadidx1 [i0] {s} p idx mem)) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s1 := v_0 - if s1.Op != OpAMD64SHLQconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - or := v_1 - if or.Op != OpAMD64ORQ { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s0 := or_0 - if s0.Op != OpAMD64SHLQconst { - continue - } - j0 := s0.AuxInt - x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+1 && j1 == j0+8 && j0%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v1.AuxInt = j0 - v2 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v2.AuxInt = i0 - v2.Aux = s - v2.AddArg3(p, idx, mem) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - } - } - break - } - // match: (ORQ s1:(SHLQconst [j1] x1:(MOVWloadidx1 [i1] {s} p idx mem)) or:(ORQ s0:(SHLQconst [j0] x0:(MOVWloadidx1 [i0] {s} p idx mem)) y)) - // cond: i1 == i0+2 && j1 == j0+16 && j0 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j0] (MOVLloadidx1 [i0] {s} p idx mem)) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s1 := v_0 - if s1.Op != OpAMD64SHLQconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - or := v_1 - if or.Op != OpAMD64ORQ { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s0 := or_0 - if s0.Op != OpAMD64SHLQconst { - continue - } - j0 := s0.AuxInt - x0 := s0.Args[0] - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x0_0, x0_1 = _i3+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+2 && j1 == j0+16 && j0%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v1.AuxInt = j0 - v2 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v2.AuxInt = i0 - v2.Aux = s - v2.AddArg3(p, idx, mem) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - } - } - break - } - // match: (ORQ x1:(MOVBload [i1] {s} p mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p mem)) + // match: (ORQ x1:(MOVBload [i1] {s} p0 mem) sh:(SHLQconst [8] x0:(MOVBload [i0] {s} p1 mem))) + // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) + // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWload [i0] {s} p0 mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x1 := v_0 @@ -23478,7 +18940,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { continue @@ -23492,7 +18954,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { + p1 := x0.Args[0] + if mem != x0.Args[1] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -23502,15 +18965,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVWload, typ.UInt16) v1.AuxInt = i0 v1.Aux = s - v1.AddArg2(p, mem) + v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p mem)) + // match: (ORQ r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p0 mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p1 mem)))) + // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLload [i0] {s} p0 mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -23524,7 +18987,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { continue @@ -23542,7 +19005,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + p1 := x0.Args[0] + if mem != x0.Args[1] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -23551,15 +19015,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVLload, typ.UInt32) v1.AuxInt = i0 v1.Aux = s - v1.AddArg2(p, mem) + v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p mem)))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p mem)) + // match: (ORQ r1:(BSWAPL x1:(MOVLload [i1] {s} p0 mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLload [i0] {s} p1 mem)))) + // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) + // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQload [i0] {s} p0 mem)) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { r1 := v_0 @@ -23573,7 +19037,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i1 := x1.AuxInt s := x1.Aux mem := x1.Args[1] - p := x1.Args[0] + p0 := x1.Args[0] sh := v_1 if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { continue @@ -23591,7 +19055,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x0.Args[1] - if p != x0.Args[0] || mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { + p1 := x0.Args[0] + if mem != x0.Args[1] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { continue } b = mergePoint(b, x0, x1) @@ -23600,15 +19065,15 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v1 := b.NewValue0(x0.Pos, OpAMD64MOVQload, typ.UInt64) v1.AuxInt = i0 v1.Aux = s - v1.AddArg2(p, mem) + v1.AddArg2(p0, mem) v0.AddArg(v1) return true } break } - // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p mem))) y) + // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBload [i0] {s} p0 mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBload [i1] {s} p1 mem)) y)) + // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWload [i0] {s} p0 mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -23623,7 +19088,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -23646,11 +19111,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] { + p1 := x1.Args[0] + if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { + if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -23663,7 +19129,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v3 := b.NewValue0(x1.Pos, OpAMD64MOVWload, typ.UInt16) v3.AuxInt = i0 v3.Aux = s - v3.AddArg2(p, mem) + v3.AddArg2(p0, mem) v2.AddArg(v3) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -23672,9 +19138,9 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p mem))) y)) - // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p mem))) y) + // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWload [i0] {s} p0 mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWload [i1] {s} p1 mem))) y)) + // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) + // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLload [i0] {s} p0 mem))) y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { s0 := v_0 @@ -23693,7 +19159,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { i0 := x0.AuxInt s := x0.Aux mem := x0.Args[1] - p := x0.Args[0] + p0 := x0.Args[0] or := v_1 if or.Op != OpAMD64ORQ { continue @@ -23720,11 +19186,12 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { continue } _ = x1.Args[1] - if p != x1.Args[0] || mem != x1.Args[1] { + p1 := x1.Args[0] + if mem != x1.Args[1] { continue } y := or_1 - if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { + if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && same(p0, p1, 1) && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { continue } b = mergePoint(b, x0, x1, y) @@ -23736,7 +19203,7 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { v3 := b.NewValue0(x1.Pos, OpAMD64MOVLload, typ.UInt32) v3.AuxInt = i0 v3.Aux = s - v3.AddArg2(p, mem) + v3.AddArg2(p0, mem) v2.AddArg(v3) v1.AddArg(v2) v0.AddArg2(v1, y) @@ -23745,330 +19212,6 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool { } break } - // match: (ORQ x1:(MOVBloadidx1 [i1] {s} p idx mem) sh:(SHLQconst [8] x0:(MOVBloadidx1 [i0] {s} p idx mem))) - // cond: i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, sh) - // result: @mergePoint(b,x0,x1) (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x1 := v_0 - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 8 { - continue - } - x0 := sh.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+1 && x0.Uses == 1 && x1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64ROLWconst, v.Type) - v.copyOf(v0) - v0.AuxInt = 8 - v1 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg3(p, idx, mem) - v0.AddArg(v1) - return true - } - } - } - break - } - // match: (ORQ r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [16] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem)))) - // cond: i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - r1 := v_0 - if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { - continue - } - x1 := r1.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 16 { - continue - } - r0 := sh.Args[0] - if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { - continue - } - x0 := r0.Args[0] - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+2 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64BSWAPL, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg3(p, idx, mem) - v0.AddArg(v1) - return true - } - } - } - break - } - // match: (ORQ r1:(BSWAPL x1:(MOVLloadidx1 [i1] {s} p idx mem)) sh:(SHLQconst [32] r0:(BSWAPL x0:(MOVLloadidx1 [i0] {s} p idx mem)))) - // cond: i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b,x0,x1) != nil && clobber(x0, x1, r0, r1, sh) - // result: @mergePoint(b,x0,x1) (BSWAPQ (MOVQloadidx1 [i0] {s} p idx mem)) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - r1 := v_0 - if r1.Op != OpAMD64BSWAPL { - continue - } - x1 := r1.Args[0] - if x1.Op != OpAMD64MOVLloadidx1 { - continue - } - i1 := x1.AuxInt - s := x1.Aux - mem := x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x1_0, x1_1 = _i1+1, x1_1, x1_0 { - p := x1_0 - idx := x1_1 - sh := v_1 - if sh.Op != OpAMD64SHLQconst || sh.AuxInt != 32 { - continue - } - r0 := sh.Args[0] - if r0.Op != OpAMD64BSWAPL { - continue - } - x0 := r0.Args[0] - if x0.Op != OpAMD64MOVLloadidx1 { - continue - } - i0 := x0.AuxInt - if x0.Aux != s { - continue - } - _ = x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i2 := 0; _i2 <= 1; _i2, x0_0, x0_1 = _i2+1, x0_1, x0_0 { - if p != x0_0 || idx != x0_1 || mem != x0.Args[2] || !(i1 == i0+4 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && sh.Uses == 1 && mergePoint(b, x0, x1) != nil && clobber(x0, x1, r0, r1, sh)) { - continue - } - b = mergePoint(b, x0, x1) - v0 := b.NewValue0(v.Pos, OpAMD64BSWAPQ, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64MOVQloadidx1, typ.UInt64) - v1.AuxInt = i0 - v1.Aux = s - v1.AddArg3(p, idx, mem) - v0.AddArg(v1) - return true - } - } - } - break - } - // match: (ORQ s0:(SHLQconst [j0] x0:(MOVBloadidx1 [i0] {s} p idx mem)) or:(ORQ s1:(SHLQconst [j1] x1:(MOVBloadidx1 [i1] {s} p idx mem)) y)) - // cond: i1 == i0+1 && j1 == j0-8 && j1 % 16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (ROLWconst [8] (MOVWloadidx1 [i0] {s} p idx mem))) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s0 := v_0 - if s0.Op != OpAMD64SHLQconst { - continue - } - j0 := s0.AuxInt - x0 := s0.Args[0] - if x0.Op != OpAMD64MOVBloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - or := v_1 - if or.Op != OpAMD64ORQ { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s1 := or_0 - if s1.Op != OpAMD64SHLQconst { - continue - } - j1 := s1.AuxInt - x1 := s1.Args[0] - if x1.Op != OpAMD64MOVBloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+1 && j1 == j0-8 && j1%16 == 0 && x0.Uses == 1 && x1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v1.AuxInt = j1 - v2 := b.NewValue0(v.Pos, OpAMD64ROLWconst, typ.UInt16) - v2.AuxInt = 8 - v3 := b.NewValue0(v.Pos, OpAMD64MOVWloadidx1, typ.UInt16) - v3.AuxInt = i0 - v3.Aux = s - v3.AddArg3(p, idx, mem) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - } - } - break - } - // match: (ORQ s0:(SHLQconst [j0] r0:(ROLWconst [8] x0:(MOVWloadidx1 [i0] {s} p idx mem))) or:(ORQ s1:(SHLQconst [j1] r1:(ROLWconst [8] x1:(MOVWloadidx1 [i1] {s} p idx mem))) y)) - // cond: i1 == i0+2 && j1 == j0-16 && j1 % 32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b,x0,x1,y) != nil && clobber(x0, x1, r0, r1, s0, s1, or) - // result: @mergePoint(b,x0,x1,y) (ORQ (SHLQconst [j1] (BSWAPL (MOVLloadidx1 [i0] {s} p idx mem))) y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - s0 := v_0 - if s0.Op != OpAMD64SHLQconst { - continue - } - j0 := s0.AuxInt - r0 := s0.Args[0] - if r0.Op != OpAMD64ROLWconst || r0.AuxInt != 8 { - continue - } - x0 := r0.Args[0] - if x0.Op != OpAMD64MOVWloadidx1 { - continue - } - i0 := x0.AuxInt - s := x0.Aux - mem := x0.Args[2] - x0_0 := x0.Args[0] - x0_1 := x0.Args[1] - for _i1 := 0; _i1 <= 1; _i1, x0_0, x0_1 = _i1+1, x0_1, x0_0 { - p := x0_0 - idx := x0_1 - or := v_1 - if or.Op != OpAMD64ORQ { - continue - } - _ = or.Args[1] - or_0 := or.Args[0] - or_1 := or.Args[1] - for _i2 := 0; _i2 <= 1; _i2, or_0, or_1 = _i2+1, or_1, or_0 { - s1 := or_0 - if s1.Op != OpAMD64SHLQconst { - continue - } - j1 := s1.AuxInt - r1 := s1.Args[0] - if r1.Op != OpAMD64ROLWconst || r1.AuxInt != 8 { - continue - } - x1 := r1.Args[0] - if x1.Op != OpAMD64MOVWloadidx1 { - continue - } - i1 := x1.AuxInt - if x1.Aux != s { - continue - } - _ = x1.Args[2] - x1_0 := x1.Args[0] - x1_1 := x1.Args[1] - for _i3 := 0; _i3 <= 1; _i3, x1_0, x1_1 = _i3+1, x1_1, x1_0 { - if p != x1_0 || idx != x1_1 || mem != x1.Args[2] { - continue - } - y := or_1 - if !(i1 == i0+2 && j1 == j0-16 && j1%32 == 0 && x0.Uses == 1 && x1.Uses == 1 && r0.Uses == 1 && r1.Uses == 1 && s0.Uses == 1 && s1.Uses == 1 && or.Uses == 1 && mergePoint(b, x0, x1, y) != nil && clobber(x0, x1, r0, r1, s0, s1, or)) { - continue - } - b = mergePoint(b, x0, x1, y) - v0 := b.NewValue0(v.Pos, OpAMD64ORQ, v.Type) - v.copyOf(v0) - v1 := b.NewValue0(v.Pos, OpAMD64SHLQconst, v.Type) - v1.AuxInt = j1 - v2 := b.NewValue0(v.Pos, OpAMD64BSWAPL, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpAMD64MOVLloadidx1, typ.UInt32) - v3.AuxInt = i0 - v3.Aux = s - v3.AddArg3(p, idx, mem) - v2.AddArg(v3) - v1.AddArg(v2) - v0.AddArg2(v1, y) - return true - } - } - } - } - break - } // match: (ORQ x l:(MOVQload [off] {sym} ptr mem)) // cond: canMergeLoadClobber(v, l, x) && clobber(l) // result: (ORQload x [off] {sym} ptr mem) diff --git a/test/codegen/memops.go b/test/codegen/memops.go index dcf5863666..9d18153a29 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -93,3 +93,91 @@ func compMem3(x, y *int) (int, bool) { // 386:`CMPL\t\(` return r, r < *y } + +// The following functions test that indexed load/store operations get generated. + +func idxInt8(x, y []int8, i int) { + var t int8 + // amd64: `MOVBL[SZ]X\t1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVB\t[A-Z]+[0-9]*, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + y[i+1] = t + // amd64: `MOVB\t[$]77, 1\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*1\)` + x[i+1] = 77 +} + +func idxInt16(x, y []int16, i int) { + var t int16 + // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + y[i+1] = t + // amd64: `MOVWL[SZ]X\t2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVW\t[A-Z]+[0-9]*, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + y[16*i+1] = t + // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*2\)` + x[i+1] = 77 + // amd64: `MOVW\t[$]77, 2\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[12]\)` + x[16*i+1] = 77 +} + +func idxInt32(x, y []int32, i int) { + var t int32 + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + y[i+1] = t + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + t = x[2*i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + y[2*i+1] = t + // amd64: `MOVL\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + y[16*i+1] = t + // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + x[i+1] = 77 + // amd64: `MOVL\t[$]77, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + x[16*i+1] = 77 +} + +func idxInt64(x, y []int64, i int) { + var t int64 + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*` + t = x[i+1] + // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + y[i+1] = t + // amd64: `MOVQ\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), [A-Z]+[0-9]*` + t = x[16*i+1] + // amd64: `MOVQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + y[16*i+1] = t + // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + x[i+1] = 77 + // amd64: `MOVQ\t[$]77, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + x[16*i+1] = 77 +} + +func idxFloat32(x, y []float32, i int) { + var t float32 + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + t = x[i+1] + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + y[i+1] = t + // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` + t = x[16*i+1] + // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\)` + y[16*i+1] = t +} + +func idxFloat64(x, y []float64, i int) { + var t float64 + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + t = x[i+1] + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + y[i+1] = t + // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` + t = x[16*i+1] + // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\)` + y[16*i+1] = t +}