func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
switch v.Op {
+ case ssa.OpAMD64VFMADD231SD:
+ p := s.Prog(v.Op.Asm())
+ p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
+ p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
+ p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
+ if v.Reg() != v.Args[0].Reg() {
+ v.Fatalf("input[0] and output not in same register %s", v.LongString())
+ }
case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
r := v.Reg()
r1 := v.Args[0].Reg()
{"checkptrArithmetic", funcTag, 122},
{"x86HasPOPCNT", varTag, 15},
{"x86HasSSE41", varTag, 15},
+ {"x86HasFMA", varTag, 15},
{"arm64HasATOMICS", varTag, 15},
}
// architecture variants
var x86HasPOPCNT bool
var x86HasSSE41 bool
+var x86HasFMA bool
var arm64HasATOMICS bool
racewriterange,
x86HasPOPCNT,
x86HasSSE41,
+ x86HasFMA,
arm64HasATOMICS,
typedmemclr,
typedmemmove,
racewriterange = sysfunc("racewriterange")
x86HasPOPCNT = sysvar("x86HasPOPCNT") // bool
x86HasSSE41 = sysvar("x86HasSSE41") // bool
+ x86HasFMA = sysvar("x86HasFMA") // bool
arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
typedmemclr = sysfunc("typedmemclr")
typedmemmove = sysfunc("typedmemmove")
return s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
},
sys.ARM64, sys.PPC64, sys.S390X)
+ addF("math", "Fma",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasFMA, s.sb)
+ v := s.load(types.Types[TBOOL], addr)
+ b := s.endBlock()
+ b.Kind = ssa.BlockIf
+ b.SetControl(v)
+ bTrue := s.f.NewBlock(ssa.BlockPlain)
+ bFalse := s.f.NewBlock(ssa.BlockPlain)
+ bEnd := s.f.NewBlock(ssa.BlockPlain)
+ b.AddEdgeTo(bTrue)
+ b.AddEdgeTo(bFalse)
+ b.Likely = ssa.BranchLikely // >= haswell cpus are common
+
+ // We have the intrinsic - use it directly.
+ s.startBlock(bTrue)
+ s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Call the pure Go version.
+ s.startBlock(bFalse)
+ a := s.call(n, callNormal)
+ s.vars[n] = s.load(types.Types[TFLOAT64], a)
+ s.endBlock().AddEdgeTo(bEnd)
+
+ // Merge results.
+ s.startBlock(bEnd)
+ return s.variable(n, types.Types[TFLOAT64])
+ },
+ sys.AMD64)
makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
(Floor x) -> (ROUNDSD [1] x)
(Ceil x) -> (ROUNDSD [2] x)
(Trunc x) -> (ROUNDSD [3] x)
+(Fma x y z) -> (VFMADD231SD z x y)
// Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
fp01 = regInfo{inputs: nil, outputs: fponly}
fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: fponly}
+ fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: fponly}
fp21load = regInfo{inputs: []regMask{fp, gpspsb, 0}, outputs: fponly}
fpgp = regInfo{inputs: fponly, outputs: gponly}
gpfp = regInfo{inputs: gponly, outputs: fponly}
// Any use must be preceded by a successful check of runtime.x86HasSSE41.
{name: "ROUNDSD", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSD"}, // rounds arg0 depending on auxint, 1 means math.Floor, 2 Ceil, 3 Trunc
+ // VFMADD231SD only exists on platforms with the FMA3 instruction set.
+ // Any use must be preceded by a successful check of runtime.support_fma.
+ {name: "VFMADD231SD", argLength: 3, reg: fp31, resultInArg0: true, asm: "VFMADD231SD"},
+
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
// Note: SBBW and SBBB are subsumed by SBBL
OpAMD64POPCNTL
OpAMD64SQRTSD
OpAMD64ROUNDSD
+ OpAMD64VFMADD231SD
OpAMD64SBBQcarrymask
OpAMD64SBBLcarrymask
OpAMD64SETEQ
},
},
},
+ {
+ name: "VFMADD231SD",
+ argLen: 3,
+ resultInArg0: true,
+ asm: x86.AVFMADD231SD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ },
+ },
{
name: "SBBQcarrymask",
argLen: 1,
return rewriteValueAMD64_OpEqPtr_0(v)
case OpFloor:
return rewriteValueAMD64_OpFloor_0(v)
+ case OpFma:
+ return rewriteValueAMD64_OpFma_0(v)
case OpGeq16:
return rewriteValueAMD64_OpGeq16_0(v)
case OpGeq16U:
return true
}
}
+func rewriteValueAMD64_OpFma_0(v *Value) bool {
+ // match: (Fma x y z)
+ // cond:
+ // result: (VFMADD231SD z x y)
+ for {
+ z := v.Args[2]
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpAMD64VFMADD231SD)
+ v.AddArg(z)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+}
func rewriteValueAMD64_OpGeq16_0(v *Value) bool {
b := v.Block
// match: (Geq16 x y)
// TODO: deprecate these; use internal/cpu directly.
x86HasPOPCNT bool
x86HasSSE41 bool
+ x86HasFMA bool
arm64HasATOMICS bool
)
// to guard execution of instructions that can not be assumed to be always supported.
x86HasPOPCNT = cpu.X86.HasPOPCNT
x86HasSSE41 = cpu.X86.HasSSE41
+ x86HasFMA = cpu.X86.HasFMA
arm64HasATOMICS = cpu.ARM64.HasATOMICS
}
}
func fma(x, y, z float64) float64 {
+ // amd64:"VFMADD231SD"
// arm64:"FMADDD"
// s390x:"FMADD"
// ppc64:"FMADD"