// license that can be found in the LICENSE file.
(Add(Ptr|64|32|16|8) ...) => (ADD ...)
-(Add(32F|64F) ...) => (FADD(S|D) ...)
+(Add(32|64)F ...) => (FADD(S|D) ...)
(Sub(Ptr|64|32|16|8) ...) => (SUB ...)
-(Sub(32F|64F) ...) => (FSUB(S|D) ...)
+(Sub(32|64)F ...) => (FSUB(S|D) ...)
(Mul64 ...) => (MUL ...)
(Mul(32|16|8) ...) => (MULW ...)
-(Mul(32F|64F) ...) => (FMUL(S|D) ...)
+(Mul(32|64)F ...) => (FMUL(S|D) ...)
-(Hmul64 ...) => (MULH ...)
+(Hmul64 ...) => (MULH ...)
(Hmul64u ...) => (UMULH ...)
-(Hmul32 x y) => (SRAconst (MULL <typ.Int64> x y) [32])
+(Hmul32 x y) => (SRAconst (MULL <typ.Int64> x y) [32])
(Hmul32u x y) => (SRAconst (UMULL <typ.UInt64> x y) [32])
(Select0 (Mul64uhilo x y)) => (UMULH x y)
(Select1 (Mul64uhilo x y)) => (MUL x y)
-(Div64 [false] x y) => (DIV x y)
-(Div64u ...) => (UDIV ...)
+(Div64 [false] x y) => (DIV x y)
(Div32 [false] x y) => (DIVW x y)
-(Div32u ...) => (UDIVW ...)
(Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y))
(Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y))
-(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y))
+(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Div64u ...) => (UDIV ...)
+(Div32u ...) => (UDIVW ...)
(Div32F ...) => (FDIVS ...)
(Div64F ...) => (FDIVD ...)
(Mod64 x y) => (MOD x y)
-(Mod64u ...) => (UMOD ...)
(Mod32 x y) => (MODW x y)
+(Mod64u ...) => (UMOD ...)
(Mod32u ...) => (UMODW ...)
-(Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y))
-(Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Mod(16|8) x y) => (MODW (SignExt(16|8)to32 x) (SignExt(16|8)to32 y))
+(Mod(16|8)u x y) => (UMODW (ZeroExt(16|8)to32 x) (ZeroExt(16|8)to32 y))
// (x + y) / 2 with x>=y => (x - y) / 2 + y
(Avg64u <t> x y) => (ADD (SRLconst <t> (SUB <t> x y) [1]) y)
(And(64|32|16|8) ...) => (AND ...)
-(Or(64|32|16|8) ...) => (OR ...)
+(Or(64|32|16|8) ...) => (OR ...)
(Xor(64|32|16|8) ...) => (XOR ...)
// unary ops
(Neg(64|32|16|8) ...) => (NEG ...)
-(Neg(32F|64F) ...) => (FNEG(S|D) ...)
+(Neg(32|64)F ...) => (FNEG(S|D) ...)
(Com(64|32|16|8) ...) => (MVN ...)
// math package intrinsics
-(Abs ...) => (FABSD ...)
-(Sqrt ...) => (FSQRTD ...)
-(Ceil ...) => (FRINTPD ...)
-(Floor ...) => (FRINTMD ...)
-(Round ...) => (FRINTAD ...)
+(Abs ...) => (FABSD ...)
+(Sqrt ...) => (FSQRTD ...)
+(Ceil ...) => (FRINTPD ...)
+(Floor ...) => (FRINTMD ...)
+(Round ...) => (FRINTAD ...)
(RoundToEven ...) => (FRINTND ...)
-(Trunc ...) => (FRINTZD ...)
-(FMA x y z) => (FMADDD z x y)
+(Trunc ...) => (FRINTZD ...)
+(FMA x y z) => (FMADDD z x y)
(Sqrt32 ...) => (FSQRTS ...)
// lowering rotates
// we do rotate detection in generic rules, if the following rules need to be changed, chcek generic rules first.
-(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
-(RotateLeft8 <t> x y) => (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y))))
+(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
+(RotateLeft8 <t> x y) => (OR <t> (SLL <t> x (ANDconst <typ.Int64> [7] y)) (SRL <t> (ZeroExt8to64 x) (ANDconst <typ.Int64> [7] (NEG <typ.Int64> y))))
(RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
(RotateLeft16 <t> x y) => (RORW <t> (ORshiftLL <typ.UInt32> (ZeroExt16to32 x) (ZeroExt16to32 x) [16]) (NEG <typ.Int64> y))
(RotateLeft32 x y) => (RORW x (NEG <y.Type> y))
(Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...)
-(Ctz64 <t> x) => (CLZ (RBIT <t> x))
+(Ctz64 <t> x) => (CLZ (RBIT <t> x))
(Ctz32 <t> x) => (CLZW (RBITW <t> x))
(Ctz16 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x10000] x)))
-(Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
+(Ctz8 <t> x) => (CLZW <t> (RBITW <typ.UInt32> (ORconst <typ.UInt32> [0x100] x)))
(PopCount64 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> x))))
(PopCount32 <t> x) => (FMOVDfpgp <t> (VUADDLV <typ.Float64> (VCNT <typ.Float64> (FMOVDgpfp <typ.Float64> (ZeroExt32to64 x)))))
(FMOVDfpgp <t> (Arg [off] {sym})) => @b.Func.Entry (Arg <t> [off] {sym})
// Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set.
-(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem)
+(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem)
(FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem)
-(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem)
+(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem)
(FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem)
// float <=> int register moves, with no conversion.
// These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}.
-(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val)
-(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val)
+(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val)
+(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val)
(MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val)
-(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val)
+(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val)
(BitLen64 x) => (SUB (MOVDconst [64]) (CLZ <typ.Int> x))
(BitLen32 x) => (SUB (MOVDconst [32]) (CLZW <typ.Int> x))
(BitRev64 ...) => (RBIT ...)
(BitRev32 ...) => (RBITW ...)
-(BitRev16 x) => (SRLconst [48] (RBIT <typ.UInt64> x))
-(BitRev8 x) => (SRLconst [56] (RBIT <typ.UInt64> x))
+(BitRev16 x) => (SRLconst [48] (RBIT <typ.UInt64> x))
+(BitRev8 x) => (SRLconst [56] (RBIT <typ.UInt64> x))
// In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into
// UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or
// after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant.
// The purpose of this rule is to have this extra UDIV instruction removed in CSE pass.
-(UMOD <typ.UInt64> x y) => (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
+(UMOD <typ.UInt64> x y) => (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
(UMODW <typ.UInt32> x y) => (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
// 64-bit addition with carry.
// boolean ops -- booleans are represented with 0=false, 1=true
(AndB ...) => (AND ...)
-(OrB ...) => (OR ...)
-(EqB x y) => (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
+(OrB ...) => (OR ...)
+(EqB x y) => (XOR (MOVDconst [1]) (XOR <typ.Bool> x y))
(NeqB ...) => (XOR ...)
-(Not x) => (XOR (MOVDconst [1]) x)
+(Not x) => (XOR (MOVDconst [1]) x)
// shifts
// hardware instruction uses only the low 6 bits of the shift
(Lsh(64|32|16|8)x64 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
(Lsh(64|32|16|8)x32 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
(Lsh(64|32|16|8)x16 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
-(Lsh(64|32|16|8)x8 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
+(Lsh(64|32|16|8)x8 <t> x y) && shiftIsBounded(v) => (SLL <t> x y)
// signed right shift
(Rsh64x(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRA <t> x y)
(Rsh8Ux(64|32|16|8) <t> x y) && shiftIsBounded(v) => (SRL <t> (ZeroExt8to64 x) y)
// shift value may be out of range, use CMP + CSEL instead
-(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh64x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh64x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh64x8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh32x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh32x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh32x8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh16x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh16x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh16x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh16x8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Lsh8x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Lsh8x32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Lsh8x16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Lsh8x8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh64Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh64Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh64Ux8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh32Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh32Ux8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh16Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh16Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh16Ux8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
-(Rsh8Ux32 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt32to64 y)))
-(Rsh8Ux16 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt16to64 y)))
-(Rsh8Ux8 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] (ZeroExt8to64 y)))
-
-(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh64x32 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh64x16 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh64x8 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh32x32 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh32x16 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh32x8 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh16x32 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh16x16 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh16x8 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
-
-(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
-(Rsh8x32 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt32to64 y))))
-(Rsh8x16 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt16to64 y))))
-(Rsh8x8 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] (ZeroExt8to64 y))))
+(Lsh64x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh64x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Lsh32x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh32x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Lsh16x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh16x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Lsh8x64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Lsh8x(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SLL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Rsh64Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh64Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> x y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Rsh32Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh32Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt32to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Rsh16Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh16Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt16to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Rsh8Ux64 <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] y))
+(Rsh8Ux(32|16|8) <t> x y) && !shiftIsBounded(v) => (CSEL [OpARM64LessThanU] (SRL <t> (ZeroExt8to64 x) y) (Const64 <t> [0]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y)))
+
+(Rsh64x64 x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh64x(32|16|8) x y) && !shiftIsBounded(v) => (SRA x (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))))
+
+(Rsh32x64 x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh32x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt32to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))))
+
+(Rsh16x64 x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh16x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt16to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))))
+
+(Rsh8x64 x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] y)))
+(Rsh8x(32|16|8) x y) && !shiftIsBounded(v) => (SRA (SignExt8to64 x) (CSEL [OpARM64LessThanU] <y.Type> y (Const64 <y.Type> [63]) (CMPconst [64] ((ZeroExt32to64|ZeroExt16to64|ZeroExt8to64) y))))
// constants
(Const(64|32|16|8) [val]) => (MOVDconst [int64(val)])
-(Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)])
+(Const(32|64)F [val]) => (FMOV(S|D)const [float64(val)])
(ConstNil) => (MOVDconst [0])
(ConstBool [t]) => (MOVDconst [b2i(t)])
// truncations
// Because we ignore high parts of registers, truncates are just copies.
-(Trunc16to8 ...) => (Copy ...)
-(Trunc32to8 ...) => (Copy ...)
+(Trunc16to8 ...) => (Copy ...)
+(Trunc32to8 ...) => (Copy ...)
(Trunc32to16 ...) => (Copy ...)
-(Trunc64to8 ...) => (Copy ...)
+(Trunc64to8 ...) => (Copy ...)
(Trunc64to16 ...) => (Copy ...)
(Trunc64to32 ...) => (Copy ...)
// Zero-/Sign-extensions
-(ZeroExt8to16 ...) => (MOVBUreg ...)
-(ZeroExt8to32 ...) => (MOVBUreg ...)
+(ZeroExt8to16 ...) => (MOVBUreg ...)
+(ZeroExt8to32 ...) => (MOVBUreg ...)
(ZeroExt16to32 ...) => (MOVHUreg ...)
-(ZeroExt8to64 ...) => (MOVBUreg ...)
+(ZeroExt8to64 ...) => (MOVBUreg ...)
(ZeroExt16to64 ...) => (MOVHUreg ...)
(ZeroExt32to64 ...) => (MOVWUreg ...)
-(SignExt8to16 ...) => (MOVBreg ...)
-(SignExt8to32 ...) => (MOVBreg ...)
+(SignExt8to16 ...) => (MOVBreg ...)
+(SignExt8to32 ...) => (MOVBreg ...)
(SignExt16to32 ...) => (MOVHreg ...)
-(SignExt8to64 ...) => (MOVBreg ...)
+(SignExt8to64 ...) => (MOVBreg ...)
(SignExt16to64 ...) => (MOVHreg ...)
(SignExt32to64 ...) => (MOVWreg ...)
// float <=> int conversion
-(Cvt32to32F ...) => (SCVTFWS ...)
-(Cvt32to64F ...) => (SCVTFWD ...)
-(Cvt64to32F ...) => (SCVTFS ...)
-(Cvt64to64F ...) => (SCVTFD ...)
+(Cvt32to32F ...) => (SCVTFWS ...)
+(Cvt32to64F ...) => (SCVTFWD ...)
+(Cvt64to32F ...) => (SCVTFS ...)
+(Cvt64to64F ...) => (SCVTFD ...)
(Cvt32Uto32F ...) => (UCVTFWS ...)
(Cvt32Uto64F ...) => (UCVTFWD ...)
(Cvt64Uto32F ...) => (UCVTFS ...)
(Cvt64Uto64F ...) => (UCVTFD ...)
-(Cvt32Fto32 ...) => (FCVTZSSW ...)
-(Cvt64Fto32 ...) => (FCVTZSDW ...)
-(Cvt32Fto64 ...) => (FCVTZSS ...)
-(Cvt64Fto64 ...) => (FCVTZSD ...)
+(Cvt32Fto32 ...) => (FCVTZSSW ...)
+(Cvt64Fto32 ...) => (FCVTZSDW ...)
+(Cvt32Fto64 ...) => (FCVTZSS ...)
+(Cvt64Fto64 ...) => (FCVTZSD ...)
(Cvt32Fto32U ...) => (FCVTZUSW ...)
(Cvt64Fto32U ...) => (FCVTZUDW ...)
(Cvt32Fto64U ...) => (FCVTZUS ...)
(Round64F ...) => (LoweredRound64F ...)
// comparisons
-(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Eq32 x y) => (Equal (CMPW x y))
-(Eq64 x y) => (Equal (CMP x y))
-(EqPtr x y) => (Equal (CMP x y))
+(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
+(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
+(Eq32 x y) => (Equal (CMPW x y))
+(Eq64 x y) => (Equal (CMP x y))
+(EqPtr x y) => (Equal (CMP x y))
(Eq32F x y) => (Equal (FCMPS x y))
(Eq64F x y) => (Equal (FCMPD x y))
-(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
-(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
-(Neq32 x y) => (NotEqual (CMPW x y))
-(Neq64 x y) => (NotEqual (CMP x y))
-(NeqPtr x y) => (NotEqual (CMP x y))
-(Neq32F x y) => (NotEqual (FCMPS x y))
-(Neq64F x y) => (NotEqual (FCMPD x y))
+(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
+(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
+(Neq32 x y) => (NotEqual (CMPW x y))
+(Neq64 x y) => (NotEqual (CMP x y))
+(NeqPtr x y) => (NotEqual (CMP x y))
+(Neq(32|64)F x y) => (NotEqual (FCMP(S|D) x y))
-(Less8 x y) => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y)))
-(Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y)))
+(Less(8|16) x y) => (LessThan (CMPW (SignExt(8|16)to32 x) (SignExt(8|16)to32 y)))
(Less32 x y) => (LessThan (CMPW x y))
-(Less64 x y) => (LessThan (CMP x y))
+(Less64 x y) => (LessThan (CMP x y))
// Set condition flags for floating-point comparisons "x < y"
// and "x <= y". Because if either or both of the operands are
// x < 1 => x == 0
// 1 <= x => x != 0
(Less(8U|16U|32U|64U) zero:(MOVDconst [0]) x) => (Neq(8|16|32|64) zero x)
-(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero)
-(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0]))
-(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x)
+(Leq(8U|16U|32U|64U) x zero:(MOVDconst [0])) => (Eq(8|16|32|64) x zero)
+(Less(8U|16U|32U|64U) x (MOVDconst [1])) => (Eq(8|16|32|64) x (MOVDconst [0]))
+(Leq(8U|16U|32U|64U) (MOVDconst [1]) x) => (Neq(8|16|32|64) (MOVDconst [0]) x)
-(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
+(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
(Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
(Less32U x y) => (LessThanU (CMPW x y))
(Less64U x y) => (LessThanU (CMP x y))
-(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
+(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y)))
(Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y)))
(Leq32 x y) => (LessEqual (CMPW x y))
(Leq64 x y) => (LessEqual (CMP x y))
(Leq32F x y) => (LessEqualF (FCMPS x y))
(Leq64F x y) => (LessEqualF (FCMPD x y))
-(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
+(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
(Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y)))
(Leq32U x y) => (LessEqualU (CMPW x y))
(Leq64U x y) => (LessEqualU (CMP x y))
(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
// zeroing
-(Zero [0] _ mem) => mem
+(Zero [0] _ mem) => mem
(Zero [1] ptr mem) => (MOVBstore ptr (MOVDconst [0]) mem)
(Zero [2] ptr mem) => (MOVHstore ptr (MOVDconst [0]) mem)
(Zero [4] ptr mem) => (MOVWstore ptr (MOVDconst [0]) mem)
mem)
// moves
-(Move [0] _ _ mem) => mem
+(Move [0] _ _ mem) => mem
(Move [1] dst src mem) => (MOVBstore dst (MOVBUload src mem) mem)
(Move [2] dst src mem) => (MOVHstore dst (MOVHUload src mem) mem)
(Move [3] dst src mem) =>
mem)
// calls
-(StaticCall ...) => (CALLstatic ...)
+(StaticCall ...) => (CALLstatic ...)
(ClosureCall ...) => (CALLclosure ...)
-(InterCall ...) => (CALLinter ...)
-(TailCall ...) => (CALLtail ...)
+(InterCall ...) => (CALLinter ...)
+(TailCall ...) => (CALLtail ...)
// checks
(NilCheck ...) => (LoweredNilCheck ...)
(IsNonNil ptr) => (NotEqual (CMPconst [0] ptr))
-(IsInBounds idx len) => (LessThanU (CMP idx len))
+(IsInBounds idx len) => (LessThanU (CMP idx len))
(IsSliceInBounds idx len) => (LessEqualU (CMP idx len))
// pseudo-ops
(GetClosurePtr ...) => (LoweredGetClosurePtr ...)
-(GetCallerSP ...) => (LoweredGetCallerSP ...)
-(GetCallerPC ...) => (LoweredGetCallerPC ...)
+(GetCallerSP ...) => (LoweredGetCallerSP ...)
+(GetCallerPC ...) => (LoweredGetCallerPC ...)
// Absorb pseudo-ops into blocks.
-(If (Equal cc) yes no) => (EQ cc yes no)
-(If (NotEqual cc) yes no) => (NE cc yes no)
-(If (LessThan cc) yes no) => (LT cc yes no)
-(If (LessThanU cc) yes no) => (ULT cc yes no)
-(If (LessEqual cc) yes no) => (LE cc yes no)
-(If (LessEqualU cc) yes no) => (ULE cc yes no)
-(If (GreaterThan cc) yes no) => (GT cc yes no)
-(If (GreaterThanU cc) yes no) => (UGT cc yes no)
-(If (GreaterEqual cc) yes no) => (GE cc yes no)
+(If (Equal cc) yes no) => (EQ cc yes no)
+(If (NotEqual cc) yes no) => (NE cc yes no)
+(If (LessThan cc) yes no) => (LT cc yes no)
+(If (LessThanU cc) yes no) => (ULT cc yes no)
+(If (LessEqual cc) yes no) => (LE cc yes no)
+(If (LessEqualU cc) yes no) => (ULE cc yes no)
+(If (GreaterThan cc) yes no) => (GT cc yes no)
+(If (GreaterThanU cc) yes no) => (UGT cc yes no)
+(If (GreaterEqual cc) yes no) => (GE cc yes no)
(If (GreaterEqualU cc) yes no) => (UGE cc yes no)
-(If (LessThanF cc) yes no) => (FLT cc yes no)
-(If (LessEqualF cc) yes no) => (FLE cc yes no)
-(If (GreaterThanF cc) yes no) => (FGT cc yes no)
+(If (LessThanF cc) yes no) => (FLT cc yes no)
+(If (LessEqualF cc) yes no) => (FLE cc yes no)
+(If (GreaterThanF cc) yes no) => (FGT cc yes no)
(If (GreaterEqualF cc) yes no) => (FGE cc yes no)
(If cond yes no) => (TBNZ [0] cond yes no)
(AtomicStorePtrNoWB ...) => (STLR ...)
(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
-(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
-(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
+(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...)
+(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
(AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...)
(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...)
// Currently the updated value is not used, but we need a register to temporarily hold it.
-(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
-(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
-(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
-(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
-
-(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem))
-(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem))
-(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem))
-(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem))
+(AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem))
+(AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem))
+(AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem))
+(AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem))
// Write barrier.
(WB ...) => (LoweredWB ...)
// Optimizations
// Absorb boolean tests into block
-(NZ (Equal cc) yes no) => (EQ cc yes no)
-(NZ (NotEqual cc) yes no) => (NE cc yes no)
-(NZ (LessThan cc) yes no) => (LT cc yes no)
-(NZ (LessThanU cc) yes no) => (ULT cc yes no)
-(NZ (LessEqual cc) yes no) => (LE cc yes no)
-(NZ (LessEqualU cc) yes no) => (ULE cc yes no)
-(NZ (GreaterThan cc) yes no) => (GT cc yes no)
-(NZ (GreaterThanU cc) yes no) => (UGT cc yes no)
-(NZ (GreaterEqual cc) yes no) => (GE cc yes no)
+(NZ (Equal cc) yes no) => (EQ cc yes no)
+(NZ (NotEqual cc) yes no) => (NE cc yes no)
+(NZ (LessThan cc) yes no) => (LT cc yes no)
+(NZ (LessThanU cc) yes no) => (ULT cc yes no)
+(NZ (LessEqual cc) yes no) => (LE cc yes no)
+(NZ (LessEqualU cc) yes no) => (ULE cc yes no)
+(NZ (GreaterThan cc) yes no) => (GT cc yes no)
+(NZ (GreaterThanU cc) yes no) => (UGT cc yes no)
+(NZ (GreaterEqual cc) yes no) => (GE cc yes no)
(NZ (GreaterEqualU cc) yes no) => (UGE cc yes no)
-(NZ (LessThanF cc) yes no) => (FLT cc yes no)
-(NZ (LessEqualF cc) yes no) => (FLE cc yes no)
-(NZ (GreaterThanF cc) yes no) => (FGT cc yes no)
+(NZ (LessThanF cc) yes no) => (FLT cc yes no)
+(NZ (LessEqualF cc) yes no) => (FLE cc yes no)
+(NZ (GreaterThanF cc) yes no) => (FGT cc yes no)
(NZ (GreaterEqualF cc) yes no) => (FGE cc yes no)
-(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no)
-(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no)
-(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no)
-(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no)
-(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no)
-(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no)
-(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no)
-(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no)
-(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no)
+(TBNZ [0] (Equal cc) yes no) => (EQ cc yes no)
+(TBNZ [0] (NotEqual cc) yes no) => (NE cc yes no)
+(TBNZ [0] (LessThan cc) yes no) => (LT cc yes no)
+(TBNZ [0] (LessThanU cc) yes no) => (ULT cc yes no)
+(TBNZ [0] (LessEqual cc) yes no) => (LE cc yes no)
+(TBNZ [0] (LessEqualU cc) yes no) => (ULE cc yes no)
+(TBNZ [0] (GreaterThan cc) yes no) => (GT cc yes no)
+(TBNZ [0] (GreaterThanU cc) yes no) => (UGT cc yes no)
+(TBNZ [0] (GreaterEqual cc) yes no) => (GE cc yes no)
(TBNZ [0] (GreaterEqualU cc) yes no) => (UGE cc yes no)
-(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no)
-(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no)
-(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no)
+(TBNZ [0] (LessThanF cc) yes no) => (FLT cc yes no)
+(TBNZ [0] (LessEqualF cc) yes no) => (FLE cc yes no)
+(TBNZ [0] (GreaterThanF cc) yes no) => (FGT cc yes no)
(TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)
-(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TST x y) yes no)
-(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TST x y) yes no)
-(LT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TST x y) yes no)
-(LE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TST x y) yes no)
-(GT (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TST x y) yes no)
-(GE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TST x y) yes no)
-
-(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTconst [c] y) yes no)
-(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTconst [c] y) yes no)
-(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTconst [c] y) yes no)
-(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTconst [c] y) yes no)
-(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTconst [c] y) yes no)
-(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTconst [c] y) yes no)
-
-(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (EQ (TSTW x y) yes no)
-(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (NE (TSTW x y) yes no)
-(LT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LT (TSTW x y) yes no)
-(LE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (LE (TSTW x y) yes no)
-(GT (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GT (TSTW x y) yes no)
-(GE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => (GE (TSTW x y) yes no)
-
-(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (EQ (TSTWconst [int32(c)] y) yes no)
-(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (NE (TSTWconst [int32(c)] y) yes no)
-(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LT (TSTWconst [int32(c)] y) yes no)
-(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (LE (TSTWconst [int32(c)] y) yes no)
-(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GT (TSTWconst [int32(c)] y) yes no)
-(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => (GE (TSTWconst [int32(c)] y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST x y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst [c] y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW x y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTWconst [int32(c)] y) yes no)
// For conditional instructions such as CSET, CSEL.
-(Equal (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TST x y))
-(NotEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TST x y))
-(LessThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TST x y))
-(LessEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TST x y))
-(GreaterThan (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TST x y))
-(GreaterEqual (CMPconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TST x y))
-
-(Equal (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTWconst [int32(c)] y))
-(NotEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTWconst [int32(c)] y))
-(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTWconst [int32(c)] y))
-(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTWconst [int32(c)] y))
-(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTWconst [int32(c)] y))
-(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTWconst [int32(c)] y))
-
-(Equal (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (Equal (TSTW x y))
-(NotEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (NotEqual (TSTW x y))
-(LessThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessThan (TSTW x y))
-(LessEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (LessEqual (TSTW x y))
-(GreaterThan (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterThan (TSTW x y))
-(GreaterEqual (CMPWconst [0] z:(AND x y))) && z.Uses == 1 => (GreaterEqual (TSTW x y))
-
-(Equal (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (Equal (TSTconst [c] y))
-(NotEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (NotEqual (TSTconst [c] y))
-(LessThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y))
-(LessEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y))
-(GreaterThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y))
-(GreaterEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y))
-
-(EQ (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNconst [c] y) yes no)
-(NE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNconst [c] y) yes no)
-(LT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNconst [c] y) yes no)
-(LE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNconst [c] y) yes no)
-(GT (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNconst [c] y) yes no)
-(GE (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNconst [c] y) yes no)
-
-(EQ (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (EQ (CMNWconst [int32(c)] y) yes no)
-(NE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (NE (CMNWconst [int32(c)] y) yes no)
-(LT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LTnoov (CMNWconst [int32(c)] y) yes no)
-(LE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (LEnoov (CMNWconst [int32(c)] y) yes no)
-(GT (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GTnoov (CMNWconst [int32(c)] y) yes no)
-(GE (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => (GEnoov (CMNWconst [int32(c)] y) yes no)
-
-(EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
-(NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
-(LT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMN x y) yes no)
-(LE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMN x y) yes no)
-(GT (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMN x y) yes no)
-(GE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMN x y) yes no)
-
-(EQ (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
-(NE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
-(LT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LTnoov (CMNW x y) yes no)
-(LE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (LEnoov (CMNW x y) yes no)
-(GT (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GTnoov (CMNW x y) yes no)
-(GE (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => (GEnoov (CMNW x y) yes no)
+((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] z:(AND x y))) && z.Uses == 1 =>
+ ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TST x y))
+((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 =>
+ ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTWconst [int32(c)] y))
+((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPWconst [0] z:(AND x y))) && z.Uses == 1 =>
+ ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTW x y))
+((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 =>
+ ((Equal|NotEqual|LessThan|LessEqual|GreaterThan|GreaterEqual) (TSTconst [c] y))
+
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNconst [c] y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] x:(ADDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNWconst [int32(c)] y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN x y) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(ADD x y)) yes no) && z.Uses == 1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW x y) yes no)
// CMP(x,-y) -> CMN(x,y) is only valid for unordered comparison, if y can be -1<<63
-(EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMN x y) yes no)
-(NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMN x y) yes no)
-
-(Equal (CMP x z:(NEG y))) && z.Uses == 1 => (Equal (CMN x y))
-(NotEqual (CMP x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMN x y))
+((EQ|NE) (CMP x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMN x y) yes no)
+((Equal|NotEqual) (CMP x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y))
// CMPW(x,-y) -> CMNW(x,y) is only valid for unordered comparison, if y can be -1<<31
-(EQ (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (EQ (CMNW x y) yes no)
-(NE (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => (NE (CMNW x y) yes no)
-
-(Equal (CMPW x z:(NEG y))) && z.Uses == 1 => (Equal (CMNW x y))
-(NotEqual (CMPW x z:(NEG y))) && z.Uses == 1 => (NotEqual (CMNW x y))
+((EQ|NE) (CMPW x z:(NEG y)) yes no) && z.Uses == 1 => ((EQ|NE) (CMNW x y) yes no)
+((Equal|NotEqual) (CMPW x z:(NEG y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y))
// For conditional instructions such as CSET, CSEL.
// TODO: add support for LT, LE, GT, GE, overflow needs to be considered.
-(Equal (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNconst [c] y))
-(NotEqual (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNconst [c] y))
-
-(Equal (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (Equal (CMNWconst [int32(c)] y))
-(NotEqual (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => (NotEqual (CMNWconst [int32(c)] y))
-
-(Equal (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMN x y))
-(NotEqual (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMN x y))
-
-(Equal (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (Equal (CMNW x y))
-(NotEqual (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => (NotEqual (CMNW x y))
-
-(Equal (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (Equal (CMN a (MUL <x.Type> x y)))
-(NotEqual (CMPconst [0] z:(MADD a x y))) && z.Uses==1 => (NotEqual (CMN a (MUL <x.Type> x y)))
-
-(Equal (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (Equal (CMP a (MUL <x.Type> x y)))
-(NotEqual (CMPconst [0] z:(MSUB a x y))) && z.Uses==1 => (NotEqual (CMP a (MUL <x.Type> x y)))
-
-(Equal (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (Equal (CMNW a (MULW <x.Type> x y)))
-(NotEqual (CMPWconst [0] z:(MADDW a x y))) && z.Uses==1 => (NotEqual (CMNW a (MULW <x.Type> x y)))
-
-(Equal (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (Equal (CMPW a (MULW <x.Type> x y)))
-(NotEqual (CMPWconst [0] z:(MSUBW a x y))) && z.Uses==1 => (NotEqual (CMPW a (MULW <x.Type> x y)))
-
-(CMPconst [c] y) && c < 0 && c != -1<<63 => (CMNconst [-c] y)
-(CMPWconst [c] y) && c < 0 && c != -1<<31 => (CMNWconst [-c] y)
-(CMNconst [c] y) && c < 0 && c != -1<<63 => (CMPconst [-c] y)
-(CMNWconst [c] y) && c < 0 && c != -1<<31 => (CMPWconst [-c] y)
-
-(EQ (CMPconst [0] x) yes no) => (Z x yes no)
-(NE (CMPconst [0] x) yes no) => (NZ x yes no)
-(EQ (CMPWconst [0] x) yes no) => (ZW x yes no)
-(NE (CMPWconst [0] x) yes no) => (NZW x yes no)
-
-(EQ (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (EQ (CMN a (MUL <x.Type> x y)) yes no)
-(NE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (NE (CMN a (MUL <x.Type> x y)) yes no)
-(LT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LTnoov (CMN a (MUL <x.Type> x y)) yes no)
-(LE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (LEnoov (CMN a (MUL <x.Type> x y)) yes no)
-(GT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GTnoov (CMN a (MUL <x.Type> x y)) yes no)
-(GE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => (GEnoov (CMN a (MUL <x.Type> x y)) yes no)
-
-(EQ (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no)
-(NE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (NE (CMP a (MUL <x.Type> x y)) yes no)
-(LE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LEnoov (CMP a (MUL <x.Type> x y)) yes no)
-(LT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (LTnoov (CMP a (MUL <x.Type> x y)) yes no)
-(GE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GEnoov (CMP a (MUL <x.Type> x y)) yes no)
-(GT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => (GTnoov (CMP a (MUL <x.Type> x y)) yes no)
-
-(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (EQ (CMNW a (MULW <x.Type> x y)) yes no)
-(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (NE (CMNW a (MULW <x.Type> x y)) yes no)
-(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LEnoov (CMNW a (MULW <x.Type> x y)) yes no)
-(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (LTnoov (CMNW a (MULW <x.Type> x y)) yes no)
-(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GEnoov (CMNW a (MULW <x.Type> x y)) yes no)
-(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => (GTnoov (CMNW a (MULW <x.Type> x y)) yes no)
-
-(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (EQ (CMPW a (MULW <x.Type> x y)) yes no)
-(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (NE (CMPW a (MULW <x.Type> x y)) yes no)
-(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LEnoov (CMPW a (MULW <x.Type> x y)) yes no)
-(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (LTnoov (CMPW a (MULW <x.Type> x y)) yes no)
-(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GEnoov (CMPW a (MULW <x.Type> x y)) yes no)
-(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => (GTnoov (CMPW a (MULW <x.Type> x y)) yes no)
+((Equal|NotEqual) (CMPconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNconst [c] y))
+((Equal|NotEqual) (CMPWconst [0] x:(ADDconst [c] y))) && x.Uses == 1 => ((Equal|NotEqual) (CMNWconst [int32(c)] y))
+((Equal|NotEqual) (CMPconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN x y))
+((Equal|NotEqual) (CMPWconst [0] z:(ADD x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW x y))
+((Equal|NotEqual) (CMPconst [0] z:(MADD a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMN a (MUL <x.Type> x y)))
+((Equal|NotEqual) (CMPconst [0] z:(MSUB a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMP a (MUL <x.Type> x y)))
+((Equal|NotEqual) (CMPWconst [0] z:(MADDW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMNW a (MULW <x.Type> x y)))
+((Equal|NotEqual) (CMPWconst [0] z:(MSUBW a x y))) && z.Uses == 1 => ((Equal|NotEqual) (CMPW a (MULW <x.Type> x y)))
+
+((CMPconst|CMNconst) [c] y) && c < 0 && c != -1<<63 => ((CMNconst|CMPconst) [-c] y)
+((CMPWconst|CMNWconst) [c] y) && c < 0 && c != -1<<31 => ((CMNWconst|CMPWconst) [-c] y)
+
+((EQ|NE) (CMPconst [0] x) yes no) => ((Z|NZ) x yes no)
+((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no)
+
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN a (MUL <x.Type> x y)) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP a (MUL <x.Type> x y)) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no)
+((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMPW a (MULW <x.Type> x y)) yes no)
// Absorb bit-tests into block
-(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no)
-(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
-(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
-(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
-(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no)
-(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
-(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
-(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
+(Z (ANDconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no)
+(NZ (ANDconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
+(ZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
+(NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
+(EQ (TSTconst [c] x) yes no) && oneBit(c) => (TBZ [int64(ntz64(c))] x yes no)
+(NE (TSTconst [c] x) yes no) && oneBit(c) => (TBNZ [int64(ntz64(c))] x yes no)
+(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
+(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) => (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
// Test sign-bit for signed comparisons against zero
(GE (CMPWconst [0] x) yes no) => (TBZ [31] x yes no)
-(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no)
-(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no)
-(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no)
+(GE (CMPconst [0] x) yes no) => (TBZ [63] x yes no)
+(LT (CMPWconst [0] x) yes no) => (TBNZ [31] x yes no)
+(LT (CMPconst [0] x) yes no) => (TBNZ [63] x yes no)
// fold offset into address
(ADDconst [off1] (MOVDaddr [off2] {sym} ptr)) && is32Bit(off1+int64(off2)) =>
(MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVBloadidx ptr idx mem)
(FMOVSload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx ptr idx mem)
(FMOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx ptr idx mem)
+
(MOVDloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem)
(MOVDloadidx (MOVDconst [c]) ptr mem) && is32Bit(c) => (MOVDload [int32(c)] ptr mem)
(MOVWUloadidx ptr (MOVDconst [c]) mem) && is32Bit(c) => (MOVWUload [int32(c)] ptr mem)
(MOVQstorezero [off1+int32(off2)] {sym} ptr mem)
// register indexed store
-(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
-(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem)
-(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem)
-(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem)
+(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
+(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx ptr idx val mem)
+(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx ptr idx val mem)
+(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVBstoreidx ptr idx val mem)
(FMOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx ptr idx val mem)
(FMOVSstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx ptr idx val mem)
-(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem)
-(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem)
-(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem)
-(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem)
-(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem)
-(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem)
-(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem)
-(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem)
+(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVDstore [int32(c)] ptr val mem)
+(MOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVDstore [int32(c)] idx val mem)
+(MOVWstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVWstore [int32(c)] ptr val mem)
+(MOVWstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVWstore [int32(c)] idx val mem)
+(MOVHstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVHstore [int32(c)] ptr val mem)
+(MOVHstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVHstore [int32(c)] idx val mem)
+(MOVBstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (MOVBstore [int32(c)] ptr val mem)
+(MOVBstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (MOVBstore [int32(c)] idx val mem)
(FMOVDstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVDstore [int32(c)] ptr val mem)
(FMOVDstoreidx (MOVDconst [c]) idx val mem) && is32Bit(c) => (FMOVDstore [int32(c)] idx val mem)
(FMOVSstoreidx ptr (MOVDconst [c]) val mem) && is32Bit(c) => (FMOVSstore [int32(c)] ptr val mem)
(MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx8 ptr idx val mem)
(MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (MOVWstoreidx4 ptr idx val mem)
(MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil => (MOVHstoreidx2 ptr idx val mem)
-(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem)
-(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem)
-(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
-(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
-(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem)
-(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem)
-(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
-(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
+(MOVDstoreidx ptr (SLLconst [3] idx) val mem) => (MOVDstoreidx8 ptr idx val mem)
+(MOVWstoreidx ptr (SLLconst [2] idx) val mem) => (MOVWstoreidx4 ptr idx val mem)
+(MOVHstoreidx ptr (SLLconst [1] idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
+(MOVHstoreidx ptr (ADD idx idx) val mem) => (MOVHstoreidx2 ptr idx val mem)
+(MOVDstoreidx (SLLconst [3] idx) ptr val mem) => (MOVDstoreidx8 ptr idx val mem)
+(MOVWstoreidx (SLLconst [2] idx) ptr val mem) => (MOVWstoreidx4 ptr idx val mem)
+(MOVHstoreidx (SLLconst [1] idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
+(MOVHstoreidx (ADD idx idx) ptr val mem) => (MOVHstoreidx2 ptr idx val mem)
(MOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (MOVDstore [int32(c)<<3] ptr val mem)
(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
(MOVDstorezeroidx ptr (SLLconst [3] idx) mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx ptr (SLLconst [2] idx) mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx ptr (SLLconst [1] idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
-(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
+(MOVHstorezeroidx ptr (ADD idx idx) mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx (SLLconst [3] idx) ptr mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx (SLLconst [2] idx) ptr mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx (SLLconst [1] idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
-(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
+(MOVHstorezeroidx (ADD idx idx) ptr mem) => (MOVHstorezeroidx2 ptr idx mem)
(MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) => (MOVDstorezeroidx8 ptr idx mem)
(MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) => (MOVWstorezeroidx4 ptr idx mem)
(MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) => (MOVHstorezeroidx2 ptr idx mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code
-//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
-//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
-//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
-//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
-//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
-//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
-//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
-//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y
+//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
+//(MOVBUload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBUreg x)
+//(MOVHload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHreg x)
+//(MOVHUload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHUreg x)
+//(MOVWload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWreg x)
+//(MOVWUload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWUreg x)
+//(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
//(FMOVSload [off] {sym} ptr (FMOVSstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
//(FMOVDload [off] {sym} ptr (FMOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x
+//(LDP [off] {sym} ptr (STP [off2] {sym2} ptr2 x y _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> x y
-(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
+(MOVBload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVBUload [off] {sym} ptr (MOVBstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
-(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
+(MOVHload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVHUload [off] {sym} ptr (MOVHstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
-(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
+(MOVWload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
-(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
+(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVDconst [0])
(MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) => (MOVDconst [0])
-(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
+(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
-(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
+(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
(MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
-(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
+(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) => (MOVDconst [0])
// don't extend after proper load
-(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
-(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
-(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
-(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
-(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
-(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
-(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
-(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
-(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
+(MOVBreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBloadidx _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHloadidx _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWloadidx _ _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUloadidx _ _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUloadidx _ _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUloadidx _ _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
(MOVHUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
-(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHloadidx2 _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWloadidx4 _ _ _)) => (MOVDreg x)
(MOVWUreg x:(MOVHUloadidx2 _ _ _)) => (MOVDreg x)
(MOVWUreg x:(MOVWUloadidx4 _ _ _)) => (MOVDreg x)
// fold double extensions
-(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
-(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
-(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
-(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
-(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
-(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
-(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
-(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
// don't extend before store
-(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
-(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem)
-(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem)
-(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem)
-(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem)
-(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem)
-(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem)
-(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem)
-(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVBreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVBUreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVHreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVHUreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVWreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVBstoreidx ptr idx (MOVWUreg x) mem) => (MOVBstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVHreg x) mem) => (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVHUreg x) mem) => (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVWreg x) mem) => (MOVHstoreidx ptr idx x mem)
+(MOVHstoreidx ptr idx (MOVWUreg x) mem) => (MOVHstoreidx ptr idx x mem)
+(MOVWstoreidx ptr idx (MOVWreg x) mem) => (MOVWstoreidx ptr idx x mem)
+(MOVWstoreidx ptr idx (MOVWUreg x) mem) => (MOVWstoreidx ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVHreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVHUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
-(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
+(MOVHstoreidx2 ptr idx (MOVWreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVWUreg x) mem) => (MOVHstoreidx2 ptr idx x mem)
-(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
+(MOVWstoreidx4 ptr idx (MOVWreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
(MOVWstoreidx4 ptr idx (MOVWUreg x) mem) => (MOVWstoreidx4 ptr idx x mem)
// if a register move has only 1 use, just use the same register without emitting instruction
(MOVDnop (MOVDconst [c])) => (MOVDconst [c])
// fold constant into arithmetic ops
-(ADD x (MOVDconst <t> [c])) && !t.IsPtr() => (ADDconst [c] x)
-(SUB x (MOVDconst [c])) => (SUBconst [c] x)
-(AND x (MOVDconst [c])) => (ANDconst [c] x)
-(OR x (MOVDconst [c])) => (ORconst [c] x)
-(XOR x (MOVDconst [c])) => (XORconst [c] x)
-(TST x (MOVDconst [c])) => (TSTconst [c] x)
+(ADD x (MOVDconst <t> [c])) && !t.IsPtr() => (ADDconst [c] x)
+(SUB x (MOVDconst [c])) => (SUBconst [c] x)
+(AND x (MOVDconst [c])) => (ANDconst [c] x)
+(OR x (MOVDconst [c])) => (ORconst [c] x)
+(XOR x (MOVDconst [c])) => (XORconst [c] x)
+(TST x (MOVDconst [c])) => (TSTconst [c] x)
(TSTW x (MOVDconst [c])) => (TSTWconst [int32(c)] x)
-(CMN x (MOVDconst [c])) => (CMNconst [c] x)
+(CMN x (MOVDconst [c])) => (CMNconst [c] x)
(CMNW x (MOVDconst [c])) => (CMNWconst [int32(c)] x)
-(BIC x (MOVDconst [c])) => (ANDconst [^c] x)
-(EON x (MOVDconst [c])) => (XORconst [^c] x)
-(ORN x (MOVDconst [c])) => (ORconst [^c] x)
+(BIC x (MOVDconst [c])) => (ANDconst [^c] x)
+(EON x (MOVDconst [c])) => (XORconst [^c] x)
+(ORN x (MOVDconst [c])) => (ORconst [^c] x)
(SLL x (MOVDconst [c])) => (SLLconst x [c&63])
(SRL x (MOVDconst [c])) => (SRLconst x [c&63])
(SRL x (ANDconst [63] y)) => (SRL x y)
(SRA x (ANDconst [63] y)) => (SRA x y)
-(CMP x (MOVDconst [c])) => (CMPconst [c] x)
-(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))
+(CMP x (MOVDconst [c])) => (CMPconst [c] x)
+(CMP (MOVDconst [c]) x) => (InvertFlags (CMPconst [c] x))
(CMPW x (MOVDconst [c])) => (CMPWconst [int32(c)] x)
(CMPW (MOVDconst [c]) x) => (InvertFlags (CMPWconst [int32(c)] x))
-(ROR x (MOVDconst [c])) => (RORconst x [c&63])
+(ROR x (MOVDconst [c])) => (RORconst x [c&63])
(RORW x (MOVDconst [c])) => (RORWconst x [c&31])
(ADDSflags x (MOVDconst [c])) => (ADDSconstflags [c] x)
((CMP|CMPW) x y) && canonLessThan(x,y) => (InvertFlags ((CMP|CMPW) y x))
// mul-neg => mneg
-(NEG (MUL x y)) => (MNEG x y)
-(NEG (MULW x y)) => (MNEGW x y)
-(MUL (NEG x) y) => (MNEG x y)
-(MULW (NEG x) y) => (MNEGW x y)
+(NEG (MUL x y)) => (MNEG x y)
+(NEG (MULW x y)) => (MNEGW x y)
+(MUL (NEG x) y) => (MNEG x y)
+(MULW (NEG x) y) => (MNEGW x y)
// madd/msub
(ADD a l:(MUL x y)) && l.Uses==1 && clobber(l) => (MADD a x y)
(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log64(c/9)])
// div by constant
-(UDIV x (MOVDconst [1])) => x
-(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x)
+(UDIV x (MOVDconst [1])) => x
+(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x)
(UDIVW x (MOVDconst [c])) && uint32(c)==1 => x
(UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x)
-(UMOD _ (MOVDconst [1])) => (MOVDconst [0])
-(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
+(UMOD _ (MOVDconst [1])) => (MOVDconst [0])
+(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
(UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0])
(UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x)
(CMNWconst (MOVDconst [x]) [y]) => (FlagConstant [addFlags32(int32(x),y)])
// other known comparisons
-(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
-(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
-(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)])
-(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)])
-(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) => (FlagConstant [subFlags64(0,1)])
-(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
+(CMPconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
+(CMPconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
+(CMPconst (MOVWUreg _) [c]) && 0xffffffff < c => (FlagConstant [subFlags64(0,1)])
+(CMPconst (ANDconst _ [m]) [n]) && 0 <= m && m < n => (FlagConstant [subFlags64(0,1)])
+(CMPconst (SRLconst _ [c]) [n]) && 0 <= n && 0 < c && c <= 63 && (1<<uint64(64-c)) <= uint64(n) => (FlagConstant [subFlags64(0,1)])
+(CMPWconst (MOVBUreg _) [c]) && 0xff < c => (FlagConstant [subFlags64(0,1)])
(CMPWconst (MOVHUreg _) [c]) && 0xffff < c => (FlagConstant [subFlags64(0,1)])
// absorb flag constants into branches
(GEnoov (FlagConstant [fc]) yes no) && fc.geNoov() => (First yes no)
(GEnoov (FlagConstant [fc]) yes no) && !fc.geNoov() => (First no yes)
-(Z (MOVDconst [0]) yes no) => (First yes no)
-(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes)
-(NZ (MOVDconst [0]) yes no) => (First no yes)
-(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no)
-(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no)
-(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes)
+(Z (MOVDconst [0]) yes no) => (First yes no)
+(Z (MOVDconst [c]) yes no) && c != 0 => (First no yes)
+(NZ (MOVDconst [0]) yes no) => (First no yes)
+(NZ (MOVDconst [c]) yes no) && c != 0 => (First yes no)
+(ZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First yes no)
+(ZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First no yes)
(NZW (MOVDconst [c]) yes no) && int32(c) == 0 => (First no yes)
(NZW (MOVDconst [c]) yes no) && int32(c) != 0 => (First yes no)
// absorb InvertFlags into branches
-(LT (InvertFlags cmp) yes no) => (GT cmp yes no)
-(GT (InvertFlags cmp) yes no) => (LT cmp yes no)
-(LE (InvertFlags cmp) yes no) => (GE cmp yes no)
-(GE (InvertFlags cmp) yes no) => (LE cmp yes no)
+(LT (InvertFlags cmp) yes no) => (GT cmp yes no)
+(GT (InvertFlags cmp) yes no) => (LT cmp yes no)
+(LE (InvertFlags cmp) yes no) => (GE cmp yes no)
+(GE (InvertFlags cmp) yes no) => (LE cmp yes no)
(ULT (InvertFlags cmp) yes no) => (UGT cmp yes no)
(UGT (InvertFlags cmp) yes no) => (ULT cmp yes no)
(ULE (InvertFlags cmp) yes no) => (UGE cmp yes no)
(UGE (InvertFlags cmp) yes no) => (ULE cmp yes no)
-(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no)
-(NE (InvertFlags cmp) yes no) => (NE cmp yes no)
+(EQ (InvertFlags cmp) yes no) => (EQ cmp yes no)
+(NE (InvertFlags cmp) yes no) => (NE cmp yes no)
(FLT (InvertFlags cmp) yes no) => (FGT cmp yes no)
(FGT (InvertFlags cmp) yes no) => (FLT cmp yes no)
(FLE (InvertFlags cmp) yes no) => (FGE cmp yes no)
(GTnoov (InvertFlags cmp) yes no) => (LTnoov cmp yes no)
// absorb InvertFlags into conditional instructions
-(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
-(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
-(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp)
+(CSEL [cc] x y (InvertFlags cmp)) => (CSEL [arm64Invert(cc)] x y cmp)
+(CSEL0 [cc] x (InvertFlags cmp)) => (CSEL0 [arm64Invert(cc)] x cmp)
+(CSETM [cc] (InvertFlags cmp)) => (CSETM [arm64Invert(cc)] cmp)
(CSINC [cc] x y (InvertFlags cmp)) => (CSINC [arm64Invert(cc)] x y cmp)
(CSINV [cc] x y (InvertFlags cmp)) => (CSINV [arm64Invert(cc)] x y cmp)
(CSNEG [cc] x y (InvertFlags cmp)) => (CSNEG [arm64Invert(cc)] x y cmp)
// absorb flag constants into boolean values
-(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
-(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())])
-(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())])
-(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())])
-(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())])
-(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())])
-(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())])
-(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())])
-(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())])
+(Equal (FlagConstant [fc])) => (MOVDconst [b2i(fc.eq())])
+(NotEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ne())])
+(LessThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.lt())])
+(LessThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ult())])
+(LessEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.le())])
+(LessEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ule())])
+(GreaterThan (FlagConstant [fc])) => (MOVDconst [b2i(fc.gt())])
+(GreaterThanU (FlagConstant [fc])) => (MOVDconst [b2i(fc.ugt())])
+(GreaterEqual (FlagConstant [fc])) => (MOVDconst [b2i(fc.ge())])
(GreaterEqualU (FlagConstant [fc])) => (MOVDconst [b2i(fc.uge())])
// absorb InvertFlags into boolean values
-(Equal (InvertFlags x)) => (Equal x)
-(NotEqual (InvertFlags x)) => (NotEqual x)
-(LessThan (InvertFlags x)) => (GreaterThan x)
-(LessThanU (InvertFlags x)) => (GreaterThanU x)
-(GreaterThan (InvertFlags x)) => (LessThan x)
-(GreaterThanU (InvertFlags x)) => (LessThanU x)
-(LessEqual (InvertFlags x)) => (GreaterEqual x)
-(LessEqualU (InvertFlags x)) => (GreaterEqualU x)
-(GreaterEqual (InvertFlags x)) => (LessEqual x)
+(Equal (InvertFlags x)) => (Equal x)
+(NotEqual (InvertFlags x)) => (NotEqual x)
+(LessThan (InvertFlags x)) => (GreaterThan x)
+(LessThanU (InvertFlags x)) => (GreaterThanU x)
+(GreaterThan (InvertFlags x)) => (LessThan x)
+(GreaterThanU (InvertFlags x)) => (LessThanU x)
+(LessEqual (InvertFlags x)) => (GreaterEqual x)
+(LessEqualU (InvertFlags x)) => (GreaterEqualU x)
+(GreaterEqual (InvertFlags x)) => (LessEqual x)
(GreaterEqualU (InvertFlags x)) => (LessEqualU x)
-(LessThanF (InvertFlags x)) => (GreaterThanF x)
-(LessEqualF (InvertFlags x)) => (GreaterEqualF x)
-(GreaterThanF (InvertFlags x)) => (LessThanF x)
+(LessThanF (InvertFlags x)) => (GreaterThanF x)
+(LessEqualF (InvertFlags x)) => (GreaterEqualF x)
+(GreaterThanF (InvertFlags x)) => (LessThanF x)
(GreaterEqualF (InvertFlags x)) => (LessEqualF x)
// Boolean-generating instructions (NOTE: NOT all boolean Values) always
(MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => (MOVDreg x)
// omit unsign extension
-
(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
// omit sign extension
-
(MOVWreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffff80000000) == 0 => (ANDconst <t> x [c])
(MOVHreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffff8000) == 0 => (ANDconst <t> x [c])
(MOVBreg <t> (ANDconst x [c])) && uint64(c) & uint64(0xffffffffffffff80) == 0 => (ANDconst <t> x [c])
// absorb flag constants into conditional instructions
-(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
-(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
-(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
+(CSEL [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => y
+(CSEL0 [cc] x flag) && ccARM64Eval(cc, flag) > 0 => x
+(CSEL0 [cc] _ flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
(CSNEG [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
(CSNEG [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (NEG y)
(CSINV [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
(CSINV [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (Not y)
(CSINC [cc] x _ flag) && ccARM64Eval(cc, flag) > 0 => x
(CSINC [cc] _ y flag) && ccARM64Eval(cc, flag) < 0 => (ADDconst [1] y)
-(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1])
-(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
+(CSETM [cc] flag) && ccARM64Eval(cc, flag) > 0 => (MOVDconst [-1])
+(CSETM [cc] flag) && ccARM64Eval(cc, flag) < 0 => (MOVDconst [0])
// absorb flags back into boolean CSEL
(CSEL [cc] x y (CMPWconst [0] boolval)) && cc == OpARM64NotEqual && flagArg(boolval) != nil =>
// int64(x << lc)
(MOVWreg (SLLconst [lc] x)) && lc < 32 => (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
(MOVHreg (SLLconst [lc] x)) && lc < 16 => (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
-(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
+(MOVBreg (SLLconst [lc] x)) && lc < 8 => (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
// int64(x) << lc
(SLLconst [lc] (MOVWreg x)) => (SBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
(SLLconst [lc] (MOVHreg x)) => (SBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
-(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
+(SLLconst [lc] (MOVBreg x)) => (SBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
// sbfx
// (x << lc) >> rc
// int64(x) >> rc
(SRAconst [rc] (MOVWreg x)) && rc < 32 => (SBFX [armBFAuxInt(rc, 32-rc)] x)
(SRAconst [rc] (MOVHreg x)) && rc < 16 => (SBFX [armBFAuxInt(rc, 16-rc)] x)
-(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x)
+(SRAconst [rc] (MOVBreg x)) && rc < 8 => (SBFX [armBFAuxInt(rc, 8-rc)] x)
// merge sbfx and sign-extension into sbfx
(MOVWreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 32 => (SBFX [bfc] x)
(MOVHreg (SBFX [bfc] x)) && bfc.getARM64BFwidth() <= 16 => (SBFX [bfc] x)
// uint64(x) << lc
(SLLconst [lc] (MOVWUreg x)) => (UBFIZ [armBFAuxInt(lc, min(32, 64-lc))] x)
(SLLconst [lc] (MOVHUreg x)) => (UBFIZ [armBFAuxInt(lc, min(16, 64-lc))] x)
-(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
+(SLLconst [lc] (MOVBUreg x)) => (UBFIZ [armBFAuxInt(lc, min(8, 64-lc))] x)
// uint64(x << lc)
(MOVWUreg (SLLconst [lc] x)) && lc < 32 => (UBFIZ [armBFAuxInt(lc, 32-lc)] x)
(MOVHUreg (SLLconst [lc] x)) && lc < 16 => (UBFIZ [armBFAuxInt(lc, 16-lc)] x)
-(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x)
+(MOVBUreg (SLLconst [lc] x)) && lc < 8 => (UBFIZ [armBFAuxInt(lc, 8-lc)] x)
// merge ANDconst into ubfiz
// (x & ac) << sc
// uint64(x) >> rc
(SRLconst [rc] (MOVWUreg x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32-rc)] x)
(SRLconst [rc] (MOVHUreg x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16-rc)] x)
-(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x)
+(SRLconst [rc] (MOVBUreg x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8-rc)] x)
// uint64(x >> rc)
(MOVWUreg (SRLconst [rc] x)) && rc < 32 => (UBFX [armBFAuxInt(rc, 32)] x)
(MOVHUreg (SRLconst [rc] x)) && rc < 16 => (UBFX [armBFAuxInt(rc, 16)] x)
-(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x)
+(MOVBUreg (SRLconst [rc] x)) && rc < 8 => (UBFX [armBFAuxInt(rc, 8)] x)
// merge ANDconst into ubfx
// (x >> sc) & ac
(ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
&& clobber(x)
=> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
(MOVBstore [i] {s} ptr w
- x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
+ x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w)
x3:(MOVBstore [i-4] {s} ptr (SRLconst [32] w)
&& clobber(x0, x1, x2, x3, x4, x5, x6)
=> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
(MOVBstore [7] {s} p w
- x0:(MOVBstore [6] {s} p (SRLconst [8] w)
+ x0:(MOVBstore [6] {s} p (SRLconst [8] w)
x1:(MOVBstore [5] {s} p (SRLconst [16] w)
x2:(MOVBstore [4] {s} p (SRLconst [24] w)
x3:(MOVBstore [3] {s} p (SRLconst [32] w)
&& clobber(x0, x1, x2, x3, x4, x5, x6)
=> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
(MOVBstore [i] {s} ptr w
- x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
+ x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
- x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
+ x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0, x1, x2)
=> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstoreidx ptr (ADDconst [3] idx) w
- x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
+ x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& clobber(x0, x1, x2)
=> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
(MOVBstoreidx ptr idx w
- x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
+ x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
- x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
+ x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0, x1, x2)
=> (MOVWstoreidx ptr idx w mem)
(MOVBstore [i] {s} ptr w
- x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
+ x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVDreg w)) mem))))
&& x0.Uses == 1
&& clobber(x0, x1, x2)
=> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w
- x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
+ x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] w) mem))))
&& x0.Uses == 1
=> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
// FP simplification
-(FNEGS (FMULS x y)) => (FNMULS x y)
-(FNEGD (FMULD x y)) => (FNMULD x y)
-(FMULS (FNEGS x) y) => (FNMULS x y)
-(FMULD (FNEGD x) y) => (FNMULD x y)
-(FNEGS (FNMULS x y)) => (FMULS x y)
-(FNEGD (FNMULD x y)) => (FMULD x y)
-(FNMULS (FNEGS x) y) => (FMULS x y)
-(FNMULD (FNEGD x) y) => (FMULD x y)
-
-(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
-(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
-(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
-(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
-(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y)
-(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y)
-(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
-(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
-(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
-(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
+(FNEGS (FMULS x y)) => (FNMULS x y)
+(FNEGD (FMULD x y)) => (FNMULD x y)
+(FMULS (FNEGS x) y) => (FNMULS x y)
+(FMULD (FNEGD x) y) => (FNMULD x y)
+(FNEGS (FNMULS x y)) => (FMULS x y)
+(FNEGD (FNMULD x y)) => (FMULD x y)
+(FNMULS (FNEGS x) y) => (FMULS x y)
+(FNMULD (FNEGD x) y) => (FMULD x y)
+
+(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
+(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
+(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
+(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
+(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y)
+(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y)
+(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
+(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
+(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
+(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
(FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y)
(FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y)