p.To.Reg = r
case ssa.OpARMSRR:
genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
- case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
+ case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD, ssa.OpARMFMULAD:
r := v.Reg()
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
{"x86HasPOPCNT", varTag, 15},
{"x86HasSSE41", varTag, 15},
{"x86HasFMA", varTag, 15},
+ {"armHasVFPv4", varTag, 15},
{"arm64HasATOMICS", varTag, 15},
}
var x86HasPOPCNT bool
var x86HasSSE41 bool
var x86HasFMA bool
+var armHasVFPv4 bool
var arm64HasATOMICS bool
x86HasPOPCNT,
x86HasSSE41,
x86HasFMA,
+ armHasVFPv4,
arm64HasATOMICS,
typedmemclr,
typedmemmove,
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
x86HasSSE41 = sysvar("x86HasSSE41") // bool
x86HasFMA = sysvar("x86HasFMA") // bool
+ armHasVFPv4 = sysvar("armHasVFPv4") // bool
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
typedmemclr = sysfunc("typedmemclr")
typedmemmove = sysfunc("typedmemmove")
return s.variable(n, types.Types[TFLOAT64])
},
sys.AMD64)
+ addF("math", "Fma",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), armHasVFPv4, s.sb)
+ v := s.load(types.Types[TBOOL], addr)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(v)
+ bTrue := s.f.NewBlock(ssa.BlockPlain)
+ bFalse := s.f.NewBlock(ssa.BlockPlain)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+ b.AddEdgeTo(bTrue)
+ b.AddEdgeTo(bFalse)
+ b.Likely = ssa.BranchLikely
+
+ // We have the intrinsic - use it directly.
+ s.startBlock(bTrue)
+ s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Call the pure Go version.
+ s.startBlock(bFalse)
+ a := s.call(n, callNormal)
+ s.vars[n] = s.load(types.Types[TFLOAT64], a)
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Merge results.
+ s.startBlock(bEnd)
+ return s.variable(n, types.Types[TFLOAT64])
+ },
+ sys.ARM)
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
(Round(32|64)F x) -> x
+// fused-multiply-add
+(Fma x y z) -> (FMULAD z x y)
+
// comparisons
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
(Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
{name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
{name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
+ // FMULAD only exists on platforms with the VFPv4 instruction set.
+ // Any use must be preceded by a successful check of runtime.arm_support_vfpv4.
+ {name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
+
{name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
{name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
{name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true}, // arg0 | arg1
OpARMMULAD
OpARMMULSF
OpARMMULSD
+ OpARMFMULAD
OpARMAND
OpARMANDconst
OpARMOR
},
},
},
+ {
+ name: "FMULAD",
+ argLen: 3,
+ resultInArg0: true,
+ asm: arm.AFMULAD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+ },
+ },
+ },
{
name: "AND",
argLen: 2,
return rewriteValueARM_OpEqB_0(v)
case OpEqPtr:
return rewriteValueARM_OpEqPtr_0(v)
+ case OpFma:
+ return rewriteValueARM_OpFma_0(v)
case OpGeq16:
return rewriteValueARM_OpGeq16_0(v)
case OpGeq16U:
return true
}
}
+func rewriteValueARM_OpFma_0(v *Value) bool {
+ // match: (Fma x y z)
+ // cond:
+ // result: (FMULAD z x y)
+ for {
+ z := v.Args[2]
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpARMFMULAD)
+ v.AddArg(z)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+}
func rewriteValueARM_OpGeq16_0(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
x86HasSSE41 bool
x86HasFMA bool
+ armHasVFPv4 bool
+
arm64HasATOMICS bool
)
x86HasSSE41 = cpu.X86.HasSSE41
x86HasFMA = cpu.X86.HasFMA
+ armHasVFPv4 = cpu.ARM.HasVFPv4
+
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
func fma(x, y, z float64) float64 {
// amd64:"VFMADD231SD"
+ // arm/6:"FMULAD"
// arm64:"FMADDD"
// s390x:"FMADD"
// ppc64:"FMADD"