p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = y
- case ssa.OpLOONG64MOVVnop:
+ case ssa.OpLOONG64MOVVnop,
+ ssa.OpLOONG64LoweredRound32F,
+ ssa.OpLOONG64LoweredRound64F:
// nothing to do
case ssa.OpLoadReg:
if v.Type.IsFlags() {
p.Reg = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = loong64.REG_FCC0
+
+ case ssa.OpLOONG64FMADDF,
+ ssa.OpLOONG64FMADDD,
+ ssa.OpLOONG64FMSUBF,
+ ssa.OpLOONG64FMSUBD,
+ ssa.OpLOONG64FNMADDF,
+ ssa.OpLOONG64FNMADDD,
+ ssa.OpLOONG64FNMSUBF,
+ ssa.OpLOONG64FNMSUBD:
+ p := s.Prog(v.Op.Asm())
+ // r=(FMA x y z) -> FMADDD z, y, x, r
+ // the SSA operand order is for taking advantage of
+ // commutativity (that only applies for the first two operands)
+ r := v.Reg()
+ x := v.Args[0].Reg()
+ y := v.Args[1].Reg()
+ z := v.Args[2].Reg()
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = z
+ p.Reg = y
+ p.AddRestSourceReg(x)
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+
case ssa.OpLOONG64MOVVaddr:
p := s.Prog(loong64.AMOVV)
p.From.Type = obj.TYPE_ADDR
(CvtBoolToUint8 ...) => (Copy ...)
-(Round(32|64)F ...) => (Copy ...)
+(Round(32|64)F ...) => (LoweredRound(32|64)F ...)
// comparisons
(Eq8 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)))
(REMVU _ (MOVVconst [1])) => (MOVVconst [0]) // mod
(REMVU x (MOVVconst [c])) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod
+// FMA
+(FMA ...) => (FMADDD ...)
+((ADD|SUB)F (MULF x y) z) && z.Block.Func.useFMA(v) => (FM(ADD|SUB)F x y z)
+((ADD|SUB)D (MULD x y) z) && z.Block.Func.useFMA(v) => (FM(ADD|SUB)D x y z)
+// z - xy -> -(xy - z)
+(SUBF z (MULF x y)) && z.Block.Func.useFMA(v) => (FNMSUBF x y z)
+(SUBD z (MULD x y)) && z.Block.Func.useFMA(v) => (FNMSUBD x y z)
+// z + (-xy) -> -(xy - z)
+// z - (-xy) -> xy + z
+((ADD|SUB)F z (NEGF (MULF x y))) && z.Block.Func.useFMA(v) => (F(NMSUB|MADD)F x y z)
+((ADD|SUB)D z (NEGD (MULD x y))) && z.Block.Func.useFMA(v) => (F(NMSUB|MADD)D x y z)
+// -xy - z -> -(xy + z)
+(SUBF (NEGF (MULF x y)) z) && z.Block.Func.useFMA(v) => (FNMADDF x y z)
+(SUBD (NEGD (MULD x y)) z) && z.Block.Func.useFMA(v) => (FNMADDD x y z)
+
// generic simplifications
(ADDV x (NEGV y)) => (SUBV x y)
(SUBV x x) => (MOVVconst [0])
fp01 = regInfo{inputs: nil, outputs: []regMask{fp}}
fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
+ fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
fp2flags = regInfo{inputs: []regMask{fp, fp}}
fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
fp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{fp}}
{name: "NOR", argLength: 2, reg: gp21, asm: "NOR", commutative: true}, // ^(arg0 | arg1)
{name: "NORconst", argLength: 1, reg: gp11, asm: "NOR", aux: "Int64"}, // ^(arg0 | auxInt)
+ {name: "FMADDF", argLength: 3, reg: fp31, asm: "FMADDF", commutative: true, typ: "Float32"}, // (arg0 * arg1) + arg2
+ {name: "FMADDD", argLength: 3, reg: fp31, asm: "FMADDD", commutative: true, typ: "Float64"}, // (arg0 * arg1) + arg2
+ {name: "FMSUBF", argLength: 3, reg: fp31, asm: "FMSUBF", commutative: true, typ: "Float32"}, // (arg0 * arg1) - arg2
+ {name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD", commutative: true, typ: "Float64"}, // (arg0 * arg1) - arg2
+ {name: "FNMADDF", argLength: 3, reg: fp31, asm: "FNMADDF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) + arg2)
+ {name: "FNMADDD", argLength: 3, reg: fp31, asm: "FNMADDD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) + arg2)
+ {name: "FNMSUBF", argLength: 3, reg: fp31, asm: "FNMSUBF", commutative: true, typ: "Float32"}, // -((arg0 * arg1) - arg2)
+ {name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD", commutative: true, typ: "Float64"}, // -((arg0 * arg1) - arg2)
+
{name: "NEGV", argLength: 1, reg: gp11}, // -arg0
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64
{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32
+ // Round ops to block fused-multiply-add extraction.
+ {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
+ {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
+
// function calls
{name: "CALLstatic", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true}, // call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
{name: "CALLtail", argLength: -1, reg: regInfo{clobbers: callerSave}, aux: "CallOff", clobberFlags: true, call: true, tailCall: true}, // tail call static function aux.(*obj.LSym). last arg=mem, auxint=argsize, returns mem
OpLOONG64XORconst
OpLOONG64NOR
OpLOONG64NORconst
+ OpLOONG64FMADDF
+ OpLOONG64FMADDD
+ OpLOONG64FMSUBF
+ OpLOONG64FMSUBD
+ OpLOONG64FNMADDF
+ OpLOONG64FNMADDD
+ OpLOONG64FNMSUBF
+ OpLOONG64FNMSUBD
OpLOONG64NEGV
OpLOONG64NEGF
OpLOONG64NEGD
OpLOONG64TRUNCDV
OpLOONG64MOVFD
OpLOONG64MOVDF
+ OpLOONG64LoweredRound32F
+ OpLOONG64LoweredRound64F
OpLOONG64CALLstatic
OpLOONG64CALLtail
OpLOONG64CALLclosure
},
},
},
+ {
+ name: "FMADDF",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFMADDF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMADDD",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFMADDD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMSUBF",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFMSUBF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMSUBD",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFMSUBD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FNMADDF",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFNMADDF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FNMADDD",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFNMADDD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FNMSUBF",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFNMSUBF,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FNMSUBD",
+ argLen: 3,
+ commutative: true,
+ asm: loong64.AFNMSUBD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {1, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ {2, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "NEGV",
argLen: 1,
},
},
},
+ {
+ name: "LoweredRound32F",
+ argLen: 1,
+ resultInArg0: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "LoweredRound64F",
+ argLen: 1,
+ resultInArg0: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ outputs: []outputInfo{
+ {0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "CALLstatic",
auxType: auxCallOff,
return rewriteValueLOONG64_OpEqB(v)
case OpEqPtr:
return rewriteValueLOONG64_OpEqPtr(v)
+ case OpFMA:
+ v.Op = OpLOONG64FMADDD
+ return true
case OpGetCallerPC:
v.Op = OpLOONG64LoweredGetCallerPC
return true
return rewriteValueLOONG64_OpIsNonNil(v)
case OpIsSliceInBounds:
return rewriteValueLOONG64_OpIsSliceInBounds(v)
+ case OpLOONG64ADDD:
+ return rewriteValueLOONG64_OpLOONG64ADDD(v)
+ case OpLOONG64ADDF:
+ return rewriteValueLOONG64_OpLOONG64ADDF(v)
case OpLOONG64ADDV:
return rewriteValueLOONG64_OpLOONG64ADDV(v)
case OpLOONG64ADDVconst:
return rewriteValueLOONG64_OpLOONG64SRLV(v)
case OpLOONG64SRLVconst:
return rewriteValueLOONG64_OpLOONG64SRLVconst(v)
+ case OpLOONG64SUBD:
+ return rewriteValueLOONG64_OpLOONG64SUBD(v)
+ case OpLOONG64SUBF:
+ return rewriteValueLOONG64_OpLOONG64SUBF(v)
case OpLOONG64SUBV:
return rewriteValueLOONG64_OpLOONG64SUBV(v)
case OpLOONG64SUBVconst:
case OpRotateLeft8:
return rewriteValueLOONG64_OpRotateLeft8(v)
case OpRound32F:
- v.Op = OpCopy
+ v.Op = OpLOONG64LoweredRound32F
return true
case OpRound64F:
- v.Op = OpCopy
+ v.Op = OpLOONG64LoweredRound64F
return true
case OpRsh16Ux16:
return rewriteValueLOONG64_OpRsh16Ux16(v)
return true
}
}
+func rewriteValueLOONG64_OpLOONG64ADDD(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (ADDD (MULD x y) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMADDD x y z)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpLOONG64MULD {
+ continue
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpLOONG64FMADDD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
+ }
+ // match: (ADDD z (NEGD (MULD x y)))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMSUBD x y z)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ z := v_0
+ if v_1.Op != OpLOONG64NEGD {
+ continue
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpLOONG64MULD {
+ continue
+ }
+ y := v_1_0.Args[1]
+ x := v_1_0.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpLOONG64FNMSUBD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
+ }
+ return false
+}
+func rewriteValueLOONG64_OpLOONG64ADDF(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (ADDF (MULF x y) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMADDF x y z)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpLOONG64MULF {
+ continue
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpLOONG64FMADDF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
+ }
+ // match: (ADDF z (NEGF (MULF x y)))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMSUBF x y z)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ z := v_0
+ if v_1.Op != OpLOONG64NEGF {
+ continue
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpLOONG64MULF {
+ continue
+ }
+ y := v_1_0.Args[1]
+ x := v_1_0.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpLOONG64FNMSUBF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
+ }
+ return false
+}
func rewriteValueLOONG64_OpLOONG64ADDV(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
}
return false
}
+func rewriteValueLOONG64_OpLOONG64SUBD(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (SUBD (MULD x y) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMSUBD x y z)
+ for {
+ if v_0.Op != OpLOONG64MULD {
+ break
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FMSUBD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBD z (MULD x y))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMSUBD x y z)
+ for {
+ z := v_0
+ if v_1.Op != OpLOONG64MULD {
+ break
+ }
+ y := v_1.Args[1]
+ x := v_1.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FNMSUBD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBD z (NEGD (MULD x y)))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMADDD x y z)
+ for {
+ z := v_0
+ if v_1.Op != OpLOONG64NEGD {
+ break
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpLOONG64MULD {
+ break
+ }
+ y := v_1_0.Args[1]
+ x := v_1_0.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FMADDD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBD (NEGD (MULD x y)) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMADDD x y z)
+ for {
+ if v_0.Op != OpLOONG64NEGD {
+ break
+ }
+ v_0_0 := v_0.Args[0]
+ if v_0_0.Op != OpLOONG64MULD {
+ break
+ }
+ y := v_0_0.Args[1]
+ x := v_0_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FNMADDD)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ return false
+}
+func rewriteValueLOONG64_OpLOONG64SUBF(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (SUBF (MULF x y) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMSUBF x y z)
+ for {
+ if v_0.Op != OpLOONG64MULF {
+ break
+ }
+ y := v_0.Args[1]
+ x := v_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FMSUBF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBF z (MULF x y))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMSUBF x y z)
+ for {
+ z := v_0
+ if v_1.Op != OpLOONG64MULF {
+ break
+ }
+ y := v_1.Args[1]
+ x := v_1.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FNMSUBF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBF z (NEGF (MULF x y)))
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FMADDF x y z)
+ for {
+ z := v_0
+ if v_1.Op != OpLOONG64NEGF {
+ break
+ }
+ v_1_0 := v_1.Args[0]
+ if v_1_0.Op != OpLOONG64MULF {
+ break
+ }
+ y := v_1_0.Args[1]
+ x := v_1_0.Args[0]
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FMADDF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ // match: (SUBF (NEGF (MULF x y)) z)
+ // cond: z.Block.Func.useFMA(v)
+ // result: (FNMADDF x y z)
+ for {
+ if v_0.Op != OpLOONG64NEGF {
+ break
+ }
+ v_0_0 := v_0.Args[0]
+ if v_0_0.Op != OpLOONG64MULF {
+ break
+ }
+ y := v_0_0.Args[1]
+ x := v_0_0.Args[0]
+ z := v_1
+ if !(z.Block.Func.useFMA(v)) {
+ break
+ }
+ v.reset(OpLOONG64FNMADDF)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ return false
+}
func rewriteValueLOONG64_OpLOONG64SUBV(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
},
- sys.ARM64, sys.PPC64, sys.RISCV64, sys.S390X)
+ sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("math", "FMA",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
if !s.config.UseFMA {
{"loong64", "internal/runtime/sys", "Len8"}: struct{}{},
{"loong64", "math", "Abs"}: struct{}{},
{"loong64", "math", "Copysign"}: struct{}{},
+ {"loong64", "math", "FMA"}: struct{}{},
{"loong64", "math", "sqrt"}: struct{}{},
{"loong64", "math/big", "mulWW"}: struct{}{},
{"loong64", "math/bits", "Add"}: struct{}{},
// s390x:"FMADDS\t"
// ppc64x:"FMADDS\t"
// arm64:"FMADDS"
+ // loong64:"FMADDF\t"
// riscv64:"FMADDS\t"
return x*y + z
}
// s390x:"FMSUBS\t"
// ppc64x:"FMSUBS\t"
// riscv64:"FMSUBS\t"
+ // loong64:"FMSUBF\t"
return x*y - z
}
func FusedSub32_b(x, y, z float32) float32 {
// arm64:"FMSUBS"
+ // loong64:"FNMSUBF\t"
// riscv64:"FNMSUBS\t"
return z - x*y
}
// s390x:"FMADD\t"
// ppc64x:"FMADD\t"
// arm64:"FMADDD"
+ // loong64:"FMADDD\t"
// riscv64:"FMADDD\t"
return x*y + z
}
// s390x:"FMSUB\t"
// ppc64x:"FMSUB\t"
// riscv64:"FMSUBD\t"
+ // loong64:"FMSUBD\t"
return x*y - z
}
func FusedSub64_b(x, y, z float64) float64 {
// arm64:"FMSUBD"
+ // loong64:"FNMSUBD\t"
// riscv64:"FNMSUBD\t"
return z - x*y
}
// amd64:"VFMADD231SD"
// arm/6:"FMULAD"
// arm64:"FMADDD"
+ // loong64:"FMADDD"
// s390x:"FMADD"
// ppc64x:"FMADD"
// riscv64:"FMADDD"