From c53d1236dfaa900387519810c8de1a9353c55bab Mon Sep 17 00:00:00 2001 From: Alberto Donizetti Date: Fri, 24 Apr 2020 17:24:16 +0200 Subject: [PATCH] cmd/compile: use typed aux for first half of arm64 lowering Passes GOARCH=arm64 gotip build -toolexec 'toolstash -cmp' -a std Change-Id: Icb530d8d128d9938ab44a9c716c8dd09a34ededf Reviewed-on: https://go-review.googlesource.com/c/go/+/229937 Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/ARM64.rules | 374 +++++++++---------- src/cmd/compile/internal/ssa/rewriteARM64.go | 324 +++++++++++----- 2 files changed, 417 insertions(+), 281 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 8478c1c678..9fec8c5526 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -2,130 +2,130 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -(Add(Ptr|64|32|16|8) ...) -> (ADD ...) -(Add(32F|64F) ...) -> (FADD(S|D) ...) - -(Sub(Ptr|64|32|16|8) ...) -> (SUB ...) -(Sub(32F|64F) ...) -> (FSUB(S|D) ...) - -(Mul64 ...) -> (MUL ...) -(Mul(32|16|8) ...) -> (MULW ...) -(Mul(32F|64F) ...) -> (FMUL(S|D) ...) - -(Hmul64 ...) -> (MULH ...) -(Hmul64u ...) -> (UMULH ...) -(Hmul32 x y) -> (SRAconst (MULL x y) [32]) -(Hmul32u x y) -> (SRAconst (UMULL x y) [32]) -(Mul64uhilo ...) -> (LoweredMuluhilo ...) - -(Div64 ...) -> (DIV ...) -(Div64u ...) -> (UDIV ...) -(Div32 ...) -> (DIVW ...) -(Div32u ...) -> (UDIVW ...) -(Div16 x y) -> (DIVW (SignExt16to32 x) (SignExt16to32 y)) -(Div16u x y) -> (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Div8 x y) -> (DIVW (SignExt8to32 x) (SignExt8to32 y)) -(Div8u x y) -> (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) -(Div32F ...) -> (FDIVS ...) -(Div64F ...) -> (FDIVD ...) - -(Mod64 ...) -> (MOD ...) -(Mod64u ...) -> (UMOD ...) -(Mod32 ...) -> (MODW ...) -(Mod32u ...) -> (UMODW ...) -(Mod16 x y) -> (MODW (SignExt16to32 x) (SignExt16to32 y)) -(Mod16u x y) -> (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Mod8 x y) -> (MODW (SignExt8to32 x) (SignExt8to32 y)) -(Mod8u x y) -> (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y)) - -// (x + y) / 2 with x>=y -> (x - y) / 2 + y -(Avg64u x y) -> (ADD (SRLconst (SUB x y) [1]) y) - -(And(64|32|16|8) ...) -> (AND ...) -(Or(64|32|16|8) ...) -> (OR ...) -(Xor(64|32|16|8) ...) -> (XOR ...) +(Add(Ptr|64|32|16|8) ...) => (ADD ...) +(Add(32F|64F) ...) => (FADD(S|D) ...) + +(Sub(Ptr|64|32|16|8) ...) => (SUB ...) +(Sub(32F|64F) ...) => (FSUB(S|D) ...) + +(Mul64 ...) => (MUL ...) +(Mul(32|16|8) ...) => (MULW ...) +(Mul(32F|64F) ...) => (FMUL(S|D) ...) + +(Hmul64 ...) => (MULH ...) +(Hmul64u ...) => (UMULH ...) +(Hmul32 x y) => (SRAconst (MULL x y) [32]) +(Hmul32u x y) => (SRAconst (UMULL x y) [32]) +(Mul64uhilo ...) => (LoweredMuluhilo ...) + +(Div64 [false] x y) => (DIV x y) +(Div64u ...) => (UDIV ...) +(Div32 [false] x y) => (DIVW x y) +(Div32u ...) => (UDIVW ...) +(Div16 [false] x y) => (DIVW (SignExt16to32 x) (SignExt16to32 y)) +(Div16u x y) => (UDIVW (ZeroExt16to32 x) (ZeroExt16to32 y)) +(Div8 x y) => (DIVW (SignExt8to32 x) (SignExt8to32 y)) +(Div8u x y) => (UDIVW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Div32F ...) => (FDIVS ...) +(Div64F ...) => (FDIVD ...) + +(Mod64 x y) => (MOD x y) +(Mod64u ...) => (UMOD ...) +(Mod32 x y) => (MODW x y) +(Mod32u ...) => (UMODW ...) +(Mod16 x y) => (MODW (SignExt16to32 x) (SignExt16to32 y)) +(Mod16u x y) => (UMODW (ZeroExt16to32 x) (ZeroExt16to32 y)) +(Mod8 x y) => (MODW (SignExt8to32 x) (SignExt8to32 y)) +(Mod8u x y) => (UMODW (ZeroExt8to32 x) (ZeroExt8to32 y)) + +// (x + y) / 2 with x>=y => (x - y) / 2 + y +(Avg64u x y) => (ADD (SRLconst (SUB x y) [1]) y) + +(And(64|32|16|8) ...) => (AND ...) +(Or(64|32|16|8) ...) => (OR ...) +(Xor(64|32|16|8) ...) => (XOR ...) // unary ops -(Neg(64|32|16|8) ...) -> (NEG ...) -(Neg(32F|64F) ...) -> (FNEG(S|D) ...) -(Com(64|32|16|8) ...) -> (MVN ...) +(Neg(64|32|16|8) ...) => (NEG ...) +(Neg(32F|64F) ...) => (FNEG(S|D) ...) +(Com(64|32|16|8) ...) => (MVN ...) // math package intrinsics -(Abs ...) -> (FABSD ...) -(Sqrt ...) -> (FSQRTD ...) -(Ceil ...) -> (FRINTPD ...) -(Floor ...) -> (FRINTMD ...) -(Round ...) -> (FRINTAD ...) -(RoundToEven ...) -> (FRINTND ...) -(Trunc ...) -> (FRINTZD ...) -(FMA x y z) -> (FMADDD z x y) +(Abs ...) => (FABSD ...) +(Sqrt ...) => (FSQRTD ...) +(Ceil ...) => (FRINTPD ...) +(Floor ...) => (FRINTMD ...) +(Round ...) => (FRINTAD ...) +(RoundToEven ...) => (FRINTND ...) +(Trunc ...) => (FRINTZD ...) +(FMA x y z) => (FMADDD z x y) // lowering rotates -(RotateLeft8 x (MOVDconst [c])) -> (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) -(RotateLeft16 x (MOVDconst [c])) -> (Or16 (Lsh16x64 x (MOVDconst [c&15])) (Rsh16Ux64 x (MOVDconst [-c&15]))) -(RotateLeft32 x y) -> (RORW x (NEG y)) -(RotateLeft64 x y) -> (ROR x (NEG y)) +(RotateLeft8 x (MOVDconst [c])) => (Or8 (Lsh8x64 x (MOVDconst [c&7])) (Rsh8Ux64 x (MOVDconst [-c&7]))) +(RotateLeft16 x (MOVDconst [c])) => (Or16 (Lsh16x64 x (MOVDconst [c&15])) (Rsh16Ux64 x (MOVDconst [-c&15]))) +(RotateLeft32 x y) => (RORW x (NEG y)) +(RotateLeft64 x y) => (ROR x (NEG y)) -(Ctz(64|32|16|8)NonZero ...) -> (Ctz(64|32|32|32) ...) +(Ctz(64|32|16|8)NonZero ...) => (Ctz(64|32|32|32) ...) -(Ctz64 x) -> (CLZ (RBIT x)) -(Ctz32 x) -> (CLZW (RBITW x)) -(Ctz16 x) -> (CLZW (RBITW (ORconst [0x10000] x))) -(Ctz8 x) -> (CLZW (RBITW (ORconst [0x100] x))) +(Ctz64 x) => (CLZ (RBIT x)) +(Ctz32 x) => (CLZW (RBITW x)) +(Ctz16 x) => (CLZW (RBITW (ORconst [0x10000] x))) +(Ctz8 x) => (CLZW (RBITW (ORconst [0x100] x))) -(PopCount64 x) -> (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp x)))) -(PopCount32 x) -> (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt32to64 x))))) -(PopCount16 x) -> (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt16to64 x))))) +(PopCount64 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp x)))) +(PopCount32 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt32to64 x))))) +(PopCount16 x) => (FMOVDfpgp (VUADDLV (VCNT (FMOVDgpfp (ZeroExt16to64 x))))) // Load args directly into the register class where it will be used. -(FMOVDgpfp (Arg [off] {sym})) -> @b.Func.Entry (Arg [off] {sym}) -(FMOVDfpgp (Arg [off] {sym})) -> @b.Func.Entry (Arg [off] {sym}) +(FMOVDgpfp (Arg [off] {sym})) => @b.Func.Entry (Arg [off] {sym}) +(FMOVDfpgp (Arg [off] {sym})) => @b.Func.Entry (Arg [off] {sym}) -// Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set. -(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) -> (FMOVDstore [off] {sym} ptr val mem) -(FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) -> (MOVDstore [off] {sym} ptr val mem) -(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) -> (FMOVSstore [off] {sym} ptr val mem) -(FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) -> (MOVWstore [off] {sym} ptr val mem) +// Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. +(MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) => (FMOVDstore [off] {sym} ptr val mem) +(FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) => (MOVDstore [off] {sym} ptr val mem) +(MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) => (FMOVSstore [off] {sym} ptr val mem) +(FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) => (MOVWstore [off] {sym} ptr val mem) -// float <-> int register moves, with no conversion. +// float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. -(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) -> (FMOVDfpgp val) -(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) -> (FMOVDgpfp val) -(MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) -> (FMOVSfpgp val) -(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) -> (FMOVSgpfp val) +(MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) => (FMOVDfpgp val) +(FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) => (FMOVDgpfp val) +(MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) => (FMOVSfpgp val) +(FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (FMOVSgpfp val) -(BitLen64 x) -> (SUB (MOVDconst [64]) (CLZ x)) -(BitLen32 x) -> (SUB (MOVDconst [32]) (CLZW x)) +(BitLen64 x) => (SUB (MOVDconst [64]) (CLZ x)) +(BitLen32 x) => (SUB (MOVDconst [32]) (CLZW x)) -(Bswap64 ...) -> (REV ...) -(Bswap32 ...) -> (REVW ...) +(Bswap64 ...) => (REV ...) +(Bswap32 ...) => (REVW ...) -(BitRev64 ...) -> (RBIT ...) -(BitRev32 ...) -> (RBITW ...) -(BitRev16 x) -> (SRLconst [48] (RBIT x)) -(BitRev8 x) -> (SRLconst [56] (RBIT x)) +(BitRev64 ...) => (RBIT ...) +(BitRev32 ...) => (RBITW ...) +(BitRev16 x) => (SRLconst [48] (RBIT x)) +(BitRev8 x) => (SRLconst [56] (RBIT x)) // In fact, UMOD will be translated into UREM instruction, and UREM is originally translated into // UDIV and MSUB instructions. But if there is already an identical UDIV instruction just before or // after UREM (case like quo, rem := z/y, z%y), then the second UDIV instruction becomes redundant. // The purpose of this rule is to have this extra UDIV instruction removed in CSE pass. -(UMOD x y) -> (MSUB x y (UDIV x y)) -(UMODW x y) -> (MSUBW x y (UDIVW x y)) +(UMOD x y) => (MSUB x y (UDIV x y)) +(UMODW x y) => (MSUBW x y (UDIVW x y)) // 64-bit addition with carry. -(Select0 (Add64carry x y c)) -> (Select0 (ADCSflags x y (Select1 (ADDSconstflags [-1] c)))) -(Select1 (Add64carry x y c)) -> (ADCzerocarry (Select1 (ADCSflags x y (Select1 (ADDSconstflags [-1] c))))) +(Select0 (Add64carry x y c)) => (Select0 (ADCSflags x y (Select1 (ADDSconstflags [-1] c)))) +(Select1 (Add64carry x y c)) => (ADCzerocarry (Select1 (ADCSflags x y (Select1 (ADDSconstflags [-1] c))))) // 64-bit subtraction with borrowing. -(Select0 (Sub64borrow x y bo)) -> (Select0 (SBCSflags x y (Select1 (NEGSflags bo)))) -(Select1 (Sub64borrow x y bo)) -> (NEG (NGCzerocarry (Select1 (SBCSflags x y (Select1 (NEGSflags bo)))))) +(Select0 (Sub64borrow x y bo)) => (Select0 (SBCSflags x y (Select1 (NEGSflags bo)))) +(Select1 (Sub64borrow x y bo)) => (NEG (NGCzerocarry (Select1 (SBCSflags x y (Select1 (NEGSflags bo)))))) // boolean ops -- booleans are represented with 0=false, 1=true -(AndB ...) -> (AND ...) -(OrB ...) -> (OR ...) -(EqB x y) -> (XOR (MOVDconst [1]) (XOR x y)) -(NeqB ...) -> (XOR ...) -(Not x) -> (XOR (MOVDconst [1]) x) +(AndB ...) => (AND ...) +(OrB ...) => (OR ...) +(EqB x y) => (XOR (MOVDconst [1]) (XOR x y)) +(NeqB ...) => (XOR ...) +(Not x) => (XOR (MOVDconst [1]) x) // shifts // hardware instruction uses only the low 6 bits of the shift @@ -193,126 +193,126 @@ (Rsh8x8 x y) -> (SRA (SignExt8to64 x) (CSEL {OpARM64LessThanU} (ZeroExt8to64 y) (Const64 [63]) (CMPconst [64] (ZeroExt8to64 y)))) // constants -(Const(64|32|16|8) ...) -> (MOVDconst ...) -(Const(32F|64F) ...) -> (FMOV(S|D)const ...) -(ConstNil) -> (MOVDconst [0]) -(ConstBool ...) -> (MOVDconst ...) +(Const(64|32|16|8) [val]) => (MOVDconst [int64(val)]) +(Const(32F|64F) [val]) => (FMOV(S|D)const [float64(val)]) +(ConstNil) => (MOVDconst [0]) +(ConstBool [b]) => (MOVDconst [b2i(b)]) -(Slicemask x) -> (SRAconst (NEG x) [63]) +(Slicemask x) => (SRAconst (NEG x) [63]) // truncations // Because we ignore high parts of registers, truncates are just copies. -(Trunc16to8 ...) -> (Copy ...) -(Trunc32to8 ...) -> (Copy ...) -(Trunc32to16 ...) -> (Copy ...) -(Trunc64to8 ...) -> (Copy ...) -(Trunc64to16 ...) -> (Copy ...) -(Trunc64to32 ...) -> (Copy ...) +(Trunc16to8 ...) => (Copy ...) +(Trunc32to8 ...) => (Copy ...) +(Trunc32to16 ...) => (Copy ...) +(Trunc64to8 ...) => (Copy ...) +(Trunc64to16 ...) => (Copy ...) +(Trunc64to32 ...) => (Copy ...) // Zero-/Sign-extensions -(ZeroExt8to16 ...) -> (MOVBUreg ...) -(ZeroExt8to32 ...) -> (MOVBUreg ...) -(ZeroExt16to32 ...) -> (MOVHUreg ...) -(ZeroExt8to64 ...) -> (MOVBUreg ...) -(ZeroExt16to64 ...) -> (MOVHUreg ...) -(ZeroExt32to64 ...) -> (MOVWUreg ...) - -(SignExt8to16 ...) -> (MOVBreg ...) -(SignExt8to32 ...) -> (MOVBreg ...) -(SignExt16to32 ...) -> (MOVHreg ...) -(SignExt8to64 ...) -> (MOVBreg ...) -(SignExt16to64 ...) -> (MOVHreg ...) -(SignExt32to64 ...) -> (MOVWreg ...) +(ZeroExt8to16 ...) => (MOVBUreg ...) +(ZeroExt8to32 ...) => (MOVBUreg ...) +(ZeroExt16to32 ...) => (MOVHUreg ...) +(ZeroExt8to64 ...) => (MOVBUreg ...) +(ZeroExt16to64 ...) => (MOVHUreg ...) +(ZeroExt32to64 ...) => (MOVWUreg ...) + +(SignExt8to16 ...) => (MOVBreg ...) +(SignExt8to32 ...) => (MOVBreg ...) +(SignExt16to32 ...) => (MOVHreg ...) +(SignExt8to64 ...) => (MOVBreg ...) +(SignExt16to64 ...) => (MOVHreg ...) +(SignExt32to64 ...) => (MOVWreg ...) // float <-> int conversion -(Cvt32to32F ...) -> (SCVTFWS ...) -(Cvt32to64F ...) -> (SCVTFWD ...) -(Cvt64to32F ...) -> (SCVTFS ...) -(Cvt64to64F ...) -> (SCVTFD ...) -(Cvt32Uto32F ...) -> (UCVTFWS ...) -(Cvt32Uto64F ...) -> (UCVTFWD ...) -(Cvt64Uto32F ...) -> (UCVTFS ...) -(Cvt64Uto64F ...) -> (UCVTFD ...) -(Cvt32Fto32 ...) -> (FCVTZSSW ...) -(Cvt64Fto32 ...) -> (FCVTZSDW ...) -(Cvt32Fto64 ...) -> (FCVTZSS ...) -(Cvt64Fto64 ...) -> (FCVTZSD ...) -(Cvt32Fto32U ...) -> (FCVTZUSW ...) -(Cvt64Fto32U ...) -> (FCVTZUDW ...) -(Cvt32Fto64U ...) -> (FCVTZUS ...) -(Cvt64Fto64U ...) -> (FCVTZUD ...) -(Cvt32Fto64F ...) -> (FCVTSD ...) -(Cvt64Fto32F ...) -> (FCVTDS ...) - -(CvtBoolToUint8 ...) -> (Copy ...) - -(Round32F ...) -> (LoweredRound32F ...) -(Round64F ...) -> (LoweredRound64F ...) +(Cvt32to32F ...) => (SCVTFWS ...) +(Cvt32to64F ...) => (SCVTFWD ...) +(Cvt64to32F ...) => (SCVTFS ...) +(Cvt64to64F ...) => (SCVTFD ...) +(Cvt32Uto32F ...) => (UCVTFWS ...) +(Cvt32Uto64F ...) => (UCVTFWD ...) +(Cvt64Uto32F ...) => (UCVTFS ...) +(Cvt64Uto64F ...) => (UCVTFD ...) +(Cvt32Fto32 ...) => (FCVTZSSW ...) +(Cvt64Fto32 ...) => (FCVTZSDW ...) +(Cvt32Fto64 ...) => (FCVTZSS ...) +(Cvt64Fto64 ...) => (FCVTZSD ...) +(Cvt32Fto32U ...) => (FCVTZUSW ...) +(Cvt64Fto32U ...) => (FCVTZUDW ...) +(Cvt32Fto64U ...) => (FCVTZUS ...) +(Cvt64Fto64U ...) => (FCVTZUD ...) +(Cvt32Fto64F ...) => (FCVTSD ...) +(Cvt64Fto32F ...) => (FCVTDS ...) + +(CvtBoolToUint8 ...) => (Copy ...) + +(Round32F ...) => (LoweredRound32F ...) +(Round64F ...) => (LoweredRound64F ...) // comparisons -(Eq8 x y) -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Eq16 x y) -> (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Eq32 x y) -> (Equal (CMPW x y)) -(Eq64 x y) -> (Equal (CMP x y)) -(EqPtr x y) -> (Equal (CMP x y)) -(Eq32F x y) -> (Equal (FCMPS x y)) -(Eq64F x y) -> (Equal (FCMPD x y)) - -(Neq8 x y) -> (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Neq16 x y) -> (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Neq32 x y) -> (NotEqual (CMPW x y)) -(Neq64 x y) -> (NotEqual (CMP x y)) -(NeqPtr x y) -> (NotEqual (CMP x y)) -(Neq32F x y) -> (NotEqual (FCMPS x y)) -(Neq64F x y) -> (NotEqual (FCMPD x y)) - -(Less8 x y) -> (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y))) -(Less16 x y) -> (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y))) -(Less32 x y) -> (LessThan (CMPW x y)) -(Less64 x y) -> (LessThan (CMP x y)) +(Eq8 x y) => (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Eq16 x y) => (Equal (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Eq32 x y) => (Equal (CMPW x y)) +(Eq64 x y) => (Equal (CMP x y)) +(EqPtr x y) => (Equal (CMP x y)) +(Eq32F x y) => (Equal (FCMPS x y)) +(Eq64F x y) => (Equal (FCMPD x y)) + +(Neq8 x y) => (NotEqual (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Neq16 x y) => (NotEqual (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Neq32 x y) => (NotEqual (CMPW x y)) +(Neq64 x y) => (NotEqual (CMP x y)) +(NeqPtr x y) => (NotEqual (CMP x y)) +(Neq32F x y) => (NotEqual (FCMPS x y)) +(Neq64F x y) => (NotEqual (FCMPD x y)) + +(Less8 x y) => (LessThan (CMPW (SignExt8to32 x) (SignExt8to32 y))) +(Less16 x y) => (LessThan (CMPW (SignExt16to32 x) (SignExt16to32 y))) +(Less32 x y) => (LessThan (CMPW x y)) +(Less64 x y) => (LessThan (CMP x y)) // Set condition flags for floating-point comparisons "x < y" // and "x <= y". Because if either or both of the operands are // NaNs, all three of (x < y), (x == y) and (x > y) are false, // and ARM Manual says FCMP instruction sets PSTATE. // of this case to (0, 0, 1, 1). -(Less32F x y) -> (LessThanF (FCMPS x y)) -(Less64F x y) -> (LessThanF (FCMPD x y)) +(Less32F x y) => (LessThanF (FCMPS x y)) +(Less64F x y) => (LessThanF (FCMPD x y)) -(Less8U x y) -> (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Less16U x y) -> (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Less32U x y) -> (LessThanU (CMPW x y)) -(Less64U x y) -> (LessThanU (CMP x y)) +(Less8U x y) => (LessThanU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Less16U x y) => (LessThanU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Less32U x y) => (LessThanU (CMPW x y)) +(Less64U x y) => (LessThanU (CMP x y)) -(Leq8 x y) -> (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) -(Leq16 x y) -> (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y))) -(Leq32 x y) -> (LessEqual (CMPW x y)) -(Leq64 x y) -> (LessEqual (CMP x y)) +(Leq8 x y) => (LessEqual (CMPW (SignExt8to32 x) (SignExt8to32 y))) +(Leq16 x y) => (LessEqual (CMPW (SignExt16to32 x) (SignExt16to32 y))) +(Leq32 x y) => (LessEqual (CMPW x y)) +(Leq64 x y) => (LessEqual (CMP x y)) // Refer to the comments for op Less64F above. -(Leq32F x y) -> (LessEqualF (FCMPS x y)) -(Leq64F x y) -> (LessEqualF (FCMPD x y)) +(Leq32F x y) => (LessEqualF (FCMPS x y)) +(Leq64F x y) => (LessEqualF (FCMPD x y)) -(Leq8U x y) -> (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) -(Leq16U x y) -> (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) -(Leq32U x y) -> (LessEqualU (CMPW x y)) -(Leq64U x y) -> (LessEqualU (CMP x y)) +(Leq8U x y) => (LessEqualU (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y))) +(Leq16U x y) => (LessEqualU (CMPW (ZeroExt16to32 x) (ZeroExt16to32 y))) +(Leq32U x y) => (LessEqualU (CMPW x y)) +(Leq64U x y) => (LessEqualU (CMP x y)) // Optimize comparison between a floating-point value and 0.0 with "FCMP $(0.0), Fn" -(FCMPS x (FMOVSconst [0])) -> (FCMPS0 x) -(FCMPS (FMOVSconst [0]) x) -> (InvertFlags (FCMPS0 x)) -(FCMPD x (FMOVDconst [0])) -> (FCMPD0 x) -(FCMPD (FMOVDconst [0]) x) -> (InvertFlags (FCMPD0 x)) +(FCMPS x (FMOVSconst [0])) => (FCMPS0 x) +(FCMPS (FMOVSconst [0]) x) => (InvertFlags (FCMPS0 x)) +(FCMPD x (FMOVDconst [0])) => (FCMPD0 x) +(FCMPD (FMOVDconst [0]) x) => (InvertFlags (FCMPD0 x)) // CSEL needs a flag-generating argument. Synthesize a CMPW if necessary. (CondSelect x y boolval) && flagArg(boolval) != nil -> (CSEL {boolval.Op} x y flagArg(boolval)) (CondSelect x y boolval) && flagArg(boolval) == nil -> (CSEL {OpARM64NotEqual} x y (CMPWconst [0] boolval)) -(OffPtr [off] ptr:(SP)) -> (MOVDaddr [off] ptr) -(OffPtr [off] ptr) -> (ADDconst [off] ptr) +(OffPtr [off] ptr:(SP)) && is32Bit(off) => (MOVDaddr [int32(off)] ptr) +(OffPtr [off] ptr) => (ADDconst [off] ptr) -(Addr ...) -> (MOVDaddr ...) -(LocalAddr {sym} base _) -> (MOVDaddr {sym} base) +(Addr {sym} base) => (MOVDaddr {sym} base) +(LocalAddr {sym} base _) => (MOVDaddr {sym} base) // loads (Load ptr mem) && t.IsBoolean() -> (MOVBUload ptr mem) diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index fd42ec8e21..2c18a70581 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -396,8 +396,7 @@ func rewriteValueARM64(v *Value) bool { v.Op = OpARM64ADD return true case OpAddr: - v.Op = OpARM64MOVDaddr - return true + return rewriteValueARM64_OpAddr(v) case OpAnd16: v.Op = OpARM64AND return true @@ -508,26 +507,19 @@ func rewriteValueARM64(v *Value) bool { case OpCondSelect: return rewriteValueARM64_OpCondSelect(v) case OpConst16: - v.Op = OpARM64MOVDconst - return true + return rewriteValueARM64_OpConst16(v) case OpConst32: - v.Op = OpARM64MOVDconst - return true + return rewriteValueARM64_OpConst32(v) case OpConst32F: - v.Op = OpARM64FMOVSconst - return true + return rewriteValueARM64_OpConst32F(v) case OpConst64: - v.Op = OpARM64MOVDconst - return true + return rewriteValueARM64_OpConst64(v) case OpConst64F: - v.Op = OpARM64FMOVDconst - return true + return rewriteValueARM64_OpConst64F(v) case OpConst8: - v.Op = OpARM64MOVDconst - return true + return rewriteValueARM64_OpConst8(v) case OpConstBool: - v.Op = OpARM64MOVDconst - return true + return rewriteValueARM64_OpConstBool(v) case OpConstNil: return rewriteValueARM64_OpConstNil(v) case OpCtz16: @@ -612,8 +604,7 @@ func rewriteValueARM64(v *Value) bool { case OpDiv16u: return rewriteValueARM64_OpDiv16u(v) case OpDiv32: - v.Op = OpARM64DIVW - return true + return rewriteValueARM64_OpDiv32(v) case OpDiv32F: v.Op = OpARM64FDIVS return true @@ -621,8 +612,7 @@ func rewriteValueARM64(v *Value) bool { v.Op = OpARM64UDIVW return true case OpDiv64: - v.Op = OpARM64DIV - return true + return rewriteValueARM64_OpDiv64(v) case OpDiv64F: v.Op = OpARM64FDIVD return true @@ -763,14 +753,12 @@ func rewriteValueARM64(v *Value) bool { case OpMod16u: return rewriteValueARM64_OpMod16u(v) case OpMod32: - v.Op = OpARM64MODW - return true + return rewriteValueARM64_OpMod32(v) case OpMod32u: v.Op = OpARM64UMODW return true case OpMod64: - v.Op = OpARM64MOD - return true + return rewriteValueARM64_OpMod64(v) case OpMod64u: v.Op = OpARM64UMOD return true @@ -4002,7 +3990,7 @@ func rewriteValueARM64_OpARM64FCMPD(v *Value) bool { // result: (FCMPD0 x) for { x := v_0 - if v_1.Op != OpARM64FMOVDconst || v_1.AuxInt != 0 { + if v_1.Op != OpARM64FMOVDconst || auxIntToFloat64(v_1.AuxInt) != 0 { break } v.reset(OpARM64FCMPD0) @@ -4012,7 +4000,7 @@ func rewriteValueARM64_OpARM64FCMPD(v *Value) bool { // match: (FCMPD (FMOVDconst [0]) x) // result: (InvertFlags (FCMPD0 x)) for { - if v_0.Op != OpARM64FMOVDconst || v_0.AuxInt != 0 { + if v_0.Op != OpARM64FMOVDconst || auxIntToFloat64(v_0.AuxInt) != 0 { break } x := v_1 @@ -4032,7 +4020,7 @@ func rewriteValueARM64_OpARM64FCMPS(v *Value) bool { // result: (FCMPS0 x) for { x := v_0 - if v_1.Op != OpARM64FMOVSconst || v_1.AuxInt != 0 { + if v_1.Op != OpARM64FMOVSconst || auxIntToFloat64(v_1.AuxInt) != 0 { break } v.reset(OpARM64FCMPS0) @@ -4042,7 +4030,7 @@ func rewriteValueARM64_OpARM64FCMPS(v *Value) bool { // match: (FCMPS (FMOVSconst [0]) x) // result: (InvertFlags (FCMPS0 x)) for { - if v_0.Op != OpARM64FMOVSconst || v_0.AuxInt != 0 { + if v_0.Op != OpARM64FMOVSconst || auxIntToFloat64(v_0.AuxInt) != 0 { break } x := v_1 @@ -4064,13 +4052,13 @@ func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool { if v_0.Op != OpArg { break } - off := v_0.AuxInt - sym := v_0.Aux + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) b = b.Func.Entry v0 := b.NewValue0(v.Pos, OpArg, t) v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } return false @@ -4085,13 +4073,13 @@ func rewriteValueARM64_OpARM64FMOVDgpfp(v *Value) bool { if v_0.Op != OpArg { break } - off := v_0.AuxInt - sym := v_0.Aux + off := auxIntToInt32(v_0.AuxInt) + sym := auxToSym(v_0.Aux) b = b.Func.Entry v0 := b.NewValue0(v.Pos, OpArg, t) v.copyOf(v0) - v0.AuxInt = off - v0.Aux = sym + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) return true } return false @@ -4104,10 +4092,10 @@ func rewriteValueARM64_OpARM64FMOVDload(v *Value) bool { // match: (FMOVDload [off] {sym} ptr (MOVDstore [off] {sym} ptr val _)) // result: (FMOVDgpfp val) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpARM64MOVDstore || v_1.AuxInt != off || v_1.Aux != sym { + if v_1.Op != OpARM64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } val := v_1.Args[1] @@ -4225,8 +4213,8 @@ func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool { // match: (FMOVDstore [off] {sym} ptr (FMOVDgpfp val) mem) // result: (MOVDstore [off] {sym} ptr val mem) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 if v_1.Op != OpARM64FMOVDgpfp { break @@ -4234,8 +4222,8 @@ func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool { val := v_1.Args[0] mem := v_2 v.reset(OpARM64MOVDstore) - v.AuxInt = off - v.Aux = sym + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, val, mem) return true } @@ -4351,10 +4339,10 @@ func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool { // match: (FMOVSload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) // result: (FMOVSgpfp val) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpARM64MOVWstore || v_1.AuxInt != off || v_1.Aux != sym { + if v_1.Op != OpARM64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } val := v_1.Args[1] @@ -4472,8 +4460,8 @@ func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool { // match: (FMOVSstore [off] {sym} ptr (FMOVSgpfp val) mem) // result: (MOVWstore [off] {sym} ptr val mem) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 if v_1.Op != OpARM64FMOVSgpfp { break @@ -4481,8 +4469,8 @@ func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool { val := v_1.Args[0] mem := v_2 v.reset(OpARM64MOVWstore) - v.AuxInt = off - v.Aux = sym + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, val, mem) return true } @@ -9253,10 +9241,10 @@ func rewriteValueARM64_OpARM64MOVDload(v *Value) bool { // match: (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr val _)) // result: (FMOVDfpgp val) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpARM64FMOVDstore || v_1.AuxInt != off || v_1.Aux != sym { + if v_1.Op != OpARM64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } val := v_1.Args[1] @@ -9535,8 +9523,8 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool { // match: (MOVDstore [off] {sym} ptr (FMOVDfpgp val) mem) // result: (FMOVDstore [off] {sym} ptr val mem) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 if v_1.Op != OpARM64FMOVDfpgp { break @@ -9544,8 +9532,8 @@ func rewriteValueARM64_OpARM64MOVDstore(v *Value) bool { val := v_1.Args[0] mem := v_2 v.reset(OpARM64FMOVDstore) - v.AuxInt = off - v.Aux = sym + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, val, mem) return true } @@ -12040,10 +12028,10 @@ func rewriteValueARM64_OpARM64MOVWUload(v *Value) bool { // match: (MOVWUload [off] {sym} ptr (FMOVSstore [off] {sym} ptr val _)) // result: (FMOVSfpgp val) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 - if v_1.Op != OpARM64FMOVSstore || v_1.AuxInt != off || v_1.Aux != sym { + if v_1.Op != OpARM64FMOVSstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { break } val := v_1.Args[1] @@ -12921,8 +12909,8 @@ func rewriteValueARM64_OpARM64MOVWstore(v *Value) bool { // match: (MOVWstore [off] {sym} ptr (FMOVSfpgp val) mem) // result: (FMOVSstore [off] {sym} ptr val mem) for { - off := v.AuxInt - sym := v.Aux + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) ptr := v_0 if v_1.Op != OpARM64FMOVSfpgp { break @@ -12930,8 +12918,8 @@ func rewriteValueARM64_OpARM64MOVWstore(v *Value) bool { val := v_1.Args[0] mem := v_2 v.reset(OpARM64FMOVSstore) - v.AuxInt = off - v.Aux = sym + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) v.AddArg3(ptr, val, mem) return true } @@ -21828,6 +21816,19 @@ func rewriteValueARM64_OpARM64XORshiftRL(v *Value) bool { } return false } +func rewriteValueARM64_OpAddr(v *Value) bool { + v_0 := v.Args[0] + // match: (Addr {sym} base) + // result: (MOVDaddr {sym} base) + for { + sym := auxToSym(v.Aux) + base := v_0 + v.reset(OpARM64MOVDaddr) + v.Aux = symToAux(sym) + v.AddArg(base) + return true + } +} func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -21878,7 +21879,7 @@ func rewriteValueARM64_OpAvg64u(v *Value) bool { y := v_1 v.reset(OpARM64ADD) v0 := b.NewValue0(v.Pos, OpARM64SRLconst, t) - v0.AuxInt = 1 + v0.AuxInt = int64ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpARM64SUB, t) v1.AddArg2(x, y) v0.AddArg(v1) @@ -21896,7 +21897,7 @@ func rewriteValueARM64_OpBitLen32(v *Value) bool { x := v_0 v.reset(OpARM64SUB) v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v0.AuxInt = 32 + v0.AuxInt = int64ToAuxInt(32) v1 := b.NewValue0(v.Pos, OpARM64CLZW, typ.Int) v1.AddArg(x) v.AddArg2(v0, v1) @@ -21913,7 +21914,7 @@ func rewriteValueARM64_OpBitLen64(v *Value) bool { x := v_0 v.reset(OpARM64SUB) v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v0.AuxInt = 64 + v0.AuxInt = int64ToAuxInt(64) v1 := b.NewValue0(v.Pos, OpARM64CLZ, typ.Int) v1.AddArg(x) v.AddArg2(v0, v1) @@ -21929,7 +21930,7 @@ func rewriteValueARM64_OpBitRev16(v *Value) bool { for { x := v_0 v.reset(OpARM64SRLconst) - v.AuxInt = 48 + v.AuxInt = int64ToAuxInt(48) v0 := b.NewValue0(v.Pos, OpARM64RBIT, typ.UInt64) v0.AddArg(x) v.AddArg(v0) @@ -21945,7 +21946,7 @@ func rewriteValueARM64_OpBitRev8(v *Value) bool { for { x := v_0 v.reset(OpARM64SRLconst) - v.AuxInt = 56 + v.AuxInt = int64ToAuxInt(56) v0 := b.NewValue0(v.Pos, OpARM64RBIT, typ.UInt64) v0.AddArg(x) v.AddArg(v0) @@ -21992,12 +21993,82 @@ func rewriteValueARM64_OpCondSelect(v *Value) bool { } return false } +func rewriteValueARM64_OpConst16(v *Value) bool { + // match: (Const16 [val]) + // result: (MOVDconst [int64(val)]) + for { + val := auxIntToInt16(v.AuxInt) + v.reset(OpARM64MOVDconst) + v.AuxInt = int64ToAuxInt(int64(val)) + return true + } +} +func rewriteValueARM64_OpConst32(v *Value) bool { + // match: (Const32 [val]) + // result: (MOVDconst [int64(val)]) + for { + val := auxIntToInt32(v.AuxInt) + v.reset(OpARM64MOVDconst) + v.AuxInt = int64ToAuxInt(int64(val)) + return true + } +} +func rewriteValueARM64_OpConst32F(v *Value) bool { + // match: (Const32F [val]) + // result: (FMOVSconst [float64(val)]) + for { + val := auxIntToFloat32(v.AuxInt) + v.reset(OpARM64FMOVSconst) + v.AuxInt = float64ToAuxInt(float64(val)) + return true + } +} +func rewriteValueARM64_OpConst64(v *Value) bool { + // match: (Const64 [val]) + // result: (MOVDconst [int64(val)]) + for { + val := auxIntToInt64(v.AuxInt) + v.reset(OpARM64MOVDconst) + v.AuxInt = int64ToAuxInt(int64(val)) + return true + } +} +func rewriteValueARM64_OpConst64F(v *Value) bool { + // match: (Const64F [val]) + // result: (FMOVDconst [float64(val)]) + for { + val := auxIntToFloat64(v.AuxInt) + v.reset(OpARM64FMOVDconst) + v.AuxInt = float64ToAuxInt(float64(val)) + return true + } +} +func rewriteValueARM64_OpConst8(v *Value) bool { + // match: (Const8 [val]) + // result: (MOVDconst [int64(val)]) + for { + val := auxIntToInt8(v.AuxInt) + v.reset(OpARM64MOVDconst) + v.AuxInt = int64ToAuxInt(int64(val)) + return true + } +} +func rewriteValueARM64_OpConstBool(v *Value) bool { + // match: (ConstBool [b]) + // result: (MOVDconst [b2i(b)]) + for { + b := auxIntToBool(v.AuxInt) + v.reset(OpARM64MOVDconst) + v.AuxInt = int64ToAuxInt(b2i(b)) + return true + } +} func rewriteValueARM64_OpConstNil(v *Value) bool { // match: (ConstNil) // result: (MOVDconst [0]) for { v.reset(OpARM64MOVDconst) - v.AuxInt = 0 + v.AuxInt = int64ToAuxInt(0) return true } } @@ -22014,7 +22085,7 @@ func rewriteValueARM64_OpCtz16(v *Value) bool { v.Type = t v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32) v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32) - v1.AuxInt = 0x10000 + v1.AuxInt = int64ToAuxInt(0x10000) v1.AddArg(x) v0.AddArg(v1) v.AddArg(v0) @@ -22064,7 +22135,7 @@ func rewriteValueARM64_OpCtz8(v *Value) bool { v.Type = t v0 := b.NewValue0(v.Pos, OpARM64RBITW, typ.UInt32) v1 := b.NewValue0(v.Pos, OpARM64ORconst, typ.UInt32) - v1.AuxInt = 0x100 + v1.AuxInt = int64ToAuxInt(0x100) v1.AddArg(x) v0.AddArg(v1) v.AddArg(v0) @@ -22076,9 +22147,12 @@ func rewriteValueARM64_OpDiv16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Div16 x y) + // match: (Div16 [false] x y) // result: (DIVW (SignExt16to32 x) (SignExt16to32 y)) for { + if auxIntToBool(v.AuxInt) != false { + break + } x := v_0 y := v_1 v.reset(OpARM64DIVW) @@ -22089,6 +22163,7 @@ func rewriteValueARM64_OpDiv16(v *Value) bool { v.AddArg2(v0, v1) return true } + return false } func rewriteValueARM64_OpDiv16u(v *Value) bool { v_1 := v.Args[1] @@ -22109,6 +22184,40 @@ func rewriteValueARM64_OpDiv16u(v *Value) bool { return true } } +func rewriteValueARM64_OpDiv32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Div32 [false] x y) + // result: (DIVW x y) + for { + if auxIntToBool(v.AuxInt) != false { + break + } + x := v_0 + y := v_1 + v.reset(OpARM64DIVW) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueARM64_OpDiv64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Div64 [false] x y) + // result: (DIV x y) + for { + if auxIntToBool(v.AuxInt) != false { + break + } + x := v_0 + y := v_1 + v.reset(OpARM64DIV) + v.AddArg2(x, y) + return true + } + return false +} func rewriteValueARM64_OpDiv8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -22265,7 +22374,7 @@ func rewriteValueARM64_OpEqB(v *Value) bool { y := v_1 v.reset(OpARM64XOR) v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v0.AuxInt = 1 + v0.AuxInt = int64ToAuxInt(1) v1 := b.NewValue0(v.Pos, OpARM64XOR, typ.Bool) v1.AddArg2(x, y) v.AddArg2(v0, v1) @@ -22314,7 +22423,7 @@ func rewriteValueARM64_OpHmul32(v *Value) bool { x := v_0 y := v_1 v.reset(OpARM64SRAconst) - v.AuxInt = 32 + v.AuxInt = int64ToAuxInt(32) v0 := b.NewValue0(v.Pos, OpARM64MULL, typ.Int64) v0.AddArg2(x, y) v.AddArg(v0) @@ -22332,7 +22441,7 @@ func rewriteValueARM64_OpHmul32u(v *Value) bool { x := v_0 y := v_1 v.reset(OpARM64SRAconst) - v.AuxInt = 32 + v.AuxInt = int64ToAuxInt(32) v0 := b.NewValue0(v.Pos, OpARM64UMULL, typ.UInt64) v0.AddArg2(x, y) v.AddArg(v0) @@ -22896,10 +23005,10 @@ func rewriteValueARM64_OpLocalAddr(v *Value) bool { // match: (LocalAddr {sym} base _) // result: (MOVDaddr {sym} base) for { - sym := v.Aux + sym := auxToSym(v.Aux) base := v_0 v.reset(OpARM64MOVDaddr) - v.Aux = sym + v.Aux = symToAux(sym) v.AddArg(base) return true } @@ -23346,6 +23455,32 @@ func rewriteValueARM64_OpMod16u(v *Value) bool { return true } } +func rewriteValueARM64_OpMod32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Mod32 x y) + // result: (MODW x y) + for { + x := v_0 + y := v_1 + v.reset(OpARM64MODW) + v.AddArg2(x, y) + return true + } +} +func rewriteValueARM64_OpMod64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (Mod64 x y) + // result: (MOD x y) + for { + x := v_0 + y := v_1 + v.reset(OpARM64MOD) + v.AddArg2(x, y) + return true + } +} func rewriteValueARM64_OpMod8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -23835,7 +23970,7 @@ func rewriteValueARM64_OpNot(v *Value) bool { x := v_0 v.reset(OpARM64XOR) v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v0.AuxInt = 1 + v0.AuxInt = int64ToAuxInt(1) v.AddArg2(v0, x) return true } @@ -23843,25 +23978,26 @@ func rewriteValueARM64_OpNot(v *Value) bool { func rewriteValueARM64_OpOffPtr(v *Value) bool { v_0 := v.Args[0] // match: (OffPtr [off] ptr:(SP)) - // result: (MOVDaddr [off] ptr) + // cond: is32Bit(off) + // result: (MOVDaddr [int32(off)] ptr) for { - off := v.AuxInt + off := auxIntToInt64(v.AuxInt) ptr := v_0 - if ptr.Op != OpSP { + if ptr.Op != OpSP || !(is32Bit(off)) { break } v.reset(OpARM64MOVDaddr) - v.AuxInt = off + v.AuxInt = int32ToAuxInt(int32(off)) v.AddArg(ptr) return true } // match: (OffPtr [off] ptr) // result: (ADDconst [off] ptr) for { - off := v.AuxInt + off := auxIntToInt64(v.AuxInt) ptr := v_0 v.reset(OpARM64ADDconst) - v.AuxInt = off + v.AuxInt = int64ToAuxInt(off) v.AddArg(ptr) return true } @@ -24000,15 +24136,15 @@ func rewriteValueARM64_OpRotateLeft16(v *Value) bool { if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt + c := auxIntToInt64(v_1.AuxInt) v.reset(OpOr16) v0 := b.NewValue0(v.Pos, OpLsh16x64, t) v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v1.AuxInt = c & 15 + v1.AuxInt = int64ToAuxInt(c & 15) v0.AddArg2(x, v1) v2 := b.NewValue0(v.Pos, OpRsh16Ux64, t) v3 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v3.AuxInt = -c & 15 + v3.AuxInt = int64ToAuxInt(-c & 15) v2.AddArg2(x, v3) v.AddArg2(v0, v2) return true @@ -24060,15 +24196,15 @@ func rewriteValueARM64_OpRotateLeft8(v *Value) bool { if v_1.Op != OpARM64MOVDconst { break } - c := v_1.AuxInt + c := auxIntToInt64(v_1.AuxInt) v.reset(OpOr8) v0 := b.NewValue0(v.Pos, OpLsh8x64, t) v1 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v1.AuxInt = c & 7 + v1.AuxInt = int64ToAuxInt(c & 7) v0.AddArg2(x, v1) v2 := b.NewValue0(v.Pos, OpRsh8Ux64, t) v3 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64) - v3.AuxInt = -c & 7 + v3.AuxInt = int64ToAuxInt(-c & 7) v2.AddArg2(x, v3) v.AddArg2(v0, v2) return true @@ -24939,7 +25075,7 @@ func rewriteValueARM64_OpSelect0(v *Value) bool { v0 := b.NewValue0(v.Pos, OpARM64ADCSflags, types.NewTuple(typ.UInt64, types.TypeFlags)) v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) v2 := b.NewValue0(v.Pos, OpARM64ADDSconstflags, types.NewTuple(typ.UInt64, types.TypeFlags)) - v2.AuxInt = -1 + v2.AuxInt = int64ToAuxInt(-1) v2.AddArg(c) v1.AddArg(v2) v0.AddArg3(x, y, v1) @@ -24987,7 +25123,7 @@ func rewriteValueARM64_OpSelect1(v *Value) bool { v1 := b.NewValue0(v.Pos, OpARM64ADCSflags, types.NewTuple(typ.UInt64, types.TypeFlags)) v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) v3 := b.NewValue0(v.Pos, OpARM64ADDSconstflags, types.NewTuple(typ.UInt64, types.TypeFlags)) - v3.AuxInt = -1 + v3.AuxInt = int64ToAuxInt(-1) v3.AddArg(c) v2.AddArg(v3) v1.AddArg3(x, y, v2) @@ -25030,7 +25166,7 @@ func rewriteValueARM64_OpSlicemask(v *Value) bool { t := v.Type x := v_0 v.reset(OpARM64SRAconst) - v.AuxInt = 63 + v.AuxInt = int64ToAuxInt(63) v0 := b.NewValue0(v.Pos, OpARM64NEG, t) v0.AddArg(x) v.AddArg(v0) -- 2.50.0