]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add fma intrinsic for arm
authorsmasher164 <aindurti@gmail.com>
Mon, 15 Oct 2018 07:14:57 +0000 (03:14 -0400)
committerKeith Randall <khr@golang.org>
Mon, 21 Oct 2019 17:42:47 +0000 (17:42 +0000)
This change introduces an arm intrinsic that generates the FMULAD
instruction for the fused-multiply-add operation on systems that
support it. System support is detected via cpu.ARM.HasVFPv4. A rewrite
rule translates the generic intrinsic to FMULAD.

Updates #25819.

Change-Id: I8459e5dd1cdbdca35f88a78dbeb7d387f1e20efa
Reviewed-on: https://go-review.googlesource.com/c/go/+/142117
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
12 files changed:
src/cmd/compile/internal/arm/ssa.go
src/cmd/compile/internal/gc/builtin.go
src/cmd/compile/internal/gc/builtin/runtime.go
src/cmd/compile/internal/gc/go.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM.rules
src/cmd/compile/internal/ssa/gen/ARMOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/runtime/cpuflags.go
src/runtime/proc.go
test/codegen/math.go

index 2c77912f215aff5fc31b5184d63cbab5d52d875b..e20a72cfc8e0b7b8139150c1c10b9e618e05296f 100644 (file)
@@ -226,7 +226,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Reg = r
        case ssa.OpARMSRR:
                genregshift(s, arm.AMOVW, 0, v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm.SHIFT_RR)
-       case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD:
+       case ssa.OpARMMULAF, ssa.OpARMMULAD, ssa.OpARMMULSF, ssa.OpARMMULSD, ssa.OpARMFMULAD:
                r := v.Reg()
                r0 := v.Args[0].Reg()
                r1 := v.Args[1].Reg()
index c7cd5fae64f0eb95948295c0bc8277a40be4a98a..ab65696a09194fc3db89ee4eebeea0e3b84ffdf2 100644 (file)
@@ -186,6 +186,7 @@ var runtimeDecls = [...]struct {
        {"x86HasPOPCNT", varTag, 15},
        {"x86HasSSE41", varTag, 15},
        {"x86HasFMA", varTag, 15},
+       {"armHasVFPv4", varTag, 15},
        {"arm64HasATOMICS", varTag, 15},
 }
 
index d9eaa0b7e5701125c9171df8e58ab258c0f64f1a..10a2241597fdf162e317a0a9c38bf60f469b1f19 100644 (file)
@@ -242,4 +242,5 @@ func checkptrArithmetic(unsafe.Pointer, []unsafe.Pointer)
 var x86HasPOPCNT bool
 var x86HasSSE41 bool
 var x86HasFMA bool
+var armHasVFPv4 bool
 var arm64HasATOMICS bool
index bd10ca047fcb7ca2260f302a7420575eb8554175..d05f754f30b5b6d84d000703d9dbc2838cefa6ef 100644 (file)
@@ -312,6 +312,7 @@ var (
        x86HasPOPCNT,
        x86HasSSE41,
        x86HasFMA,
+       armHasVFPv4,
        arm64HasATOMICS,
        typedmemclr,
        typedmemmove,
index b65882e032b047cc2e358e8f0b8b9b10376083a9..bbedbbc5aaa2572f191687c126957a9ad5585e36 100644 (file)
@@ -92,6 +92,7 @@ func initssaconfig() {
        x86HasPOPCNT = sysvar("x86HasPOPCNT")       // bool
        x86HasSSE41 = sysvar("x86HasSSE41")         // bool
        x86HasFMA = sysvar("x86HasFMA")             // bool
+       armHasVFPv4 = sysvar("armHasVFPv4")         // bool
        arm64HasATOMICS = sysvar("arm64HasATOMICS") // bool
        typedmemclr = sysfunc("typedmemclr")
        typedmemmove = sysfunc("typedmemmove")
@@ -3357,6 +3358,36 @@ func init() {
                        return s.variable(n, types.Types[TFLOAT64])
                },
                sys.AMD64)
+       addF("math", "Fma",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), armHasVFPv4, s.sb)
+                       v := s.load(types.Types[TBOOL], addr)
+                       b := s.endBlock()
+                       b.Kind = ssa.BlockIf
+                       b.SetControl(v)
+                       bTrue := s.f.NewBlock(ssa.BlockPlain)
+                       bFalse := s.f.NewBlock(ssa.BlockPlain)
+                       bEnd := s.f.NewBlock(ssa.BlockPlain)
+                       b.AddEdgeTo(bTrue)
+                       b.AddEdgeTo(bFalse)
+                       b.Likely = ssa.BranchLikely
+
+                       // We have the intrinsic - use it directly.
+                       s.startBlock(bTrue)
+                       s.vars[n] = s.newValue3(ssa.OpFma, types.Types[TFLOAT64], args[0], args[1], args[2])
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Call the pure Go version.
+                       s.startBlock(bFalse)
+                       a := s.call(n, callNormal)
+                       s.vars[n] = s.load(types.Types[TFLOAT64], a)
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Merge results.
+                       s.startBlock(bEnd)
+                       return s.variable(n, types.Types[TFLOAT64])
+               },
+               sys.ARM)
 
        makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
index 4ab388cae9c08d44a8e3db900d1a7abb388692cf..c1c73e23ec2f5fa55126efb99cdfbf522306ac81 100644 (file)
 
 (Round(32|64)F x) -> x
 
+// fused-multiply-add
+(Fma x y z) -> (FMULAD z x y)
+
 // comparisons
 (Eq8 x y)  -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
 (Eq16 x y) -> (Equal (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
index 9795215c8ac06736986b55f008abffd0c4a37429..bde170864de86e41ef67faf59906f4698dcd0f7e 100644 (file)
@@ -192,6 +192,10 @@ func init() {
                {name: "MULSF", argLength: 3, reg: fp31, asm: "MULSF", resultInArg0: true}, // arg0 - (arg1 * arg2)
                {name: "MULSD", argLength: 3, reg: fp31, asm: "MULSD", resultInArg0: true}, // arg0 - (arg1 * arg2)
 
+               // FMULAD only exists on platforms with the VFPv4 instruction set.
+               // Any use must be preceded by a successful check of runtime.arm_support_vfpv4.
+               {name: "FMULAD", argLength: 3, reg: fp31, asm: "FMULAD", resultInArg0: true}, // arg0 + (arg1 * arg2)
+
                {name: "AND", argLength: 2, reg: gp21, asm: "AND", commutative: true}, // arg0 & arg1
                {name: "ANDconst", argLength: 1, reg: gp11, asm: "AND", aux: "Int32"}, // arg0 & auxInt
                {name: "OR", argLength: 2, reg: gp21, asm: "ORR", commutative: true},  // arg0 | arg1
index 59740da9a4c520a29579b72b4967b3d9f92b18cb..1bac3919144a0e9b258f20fab0d4bc87db9c5bcd 100644 (file)
@@ -925,6 +925,7 @@ const (
        OpARMMULAD
        OpARMMULSF
        OpARMMULSD
+       OpARMFMULAD
        OpARMAND
        OpARMANDconst
        OpARMOR
@@ -12119,6 +12120,22 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "FMULAD",
+               argLen:       3,
+               resultInArg0: true,
+               asm:          arm.AFMULAD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {2, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
        {
                name:        "AND",
                argLen:      2,
index ece2fe4fe9c203b1467d3b963c713448706ce118..24064bdefb797337f5cc7476c61169c164772230 100644 (file)
@@ -538,6 +538,8 @@ func rewriteValueARM(v *Value) bool {
                return rewriteValueARM_OpEqB_0(v)
        case OpEqPtr:
                return rewriteValueARM_OpEqPtr_0(v)
+       case OpFma:
+               return rewriteValueARM_OpFma_0(v)
        case OpGeq16:
                return rewriteValueARM_OpGeq16_0(v)
        case OpGeq16U:
@@ -17159,6 +17161,21 @@ func rewriteValueARM_OpEqPtr_0(v *Value) bool {
                return true
        }
 }
+func rewriteValueARM_OpFma_0(v *Value) bool {
+       // match: (Fma x y z)
+       // cond:
+       // result: (FMULAD z x y)
+       for {
+               z := v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v.reset(OpARMFMULAD)
+               v.AddArg(z)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+}
 func rewriteValueARM_OpGeq16_0(v *Value) bool {
        b := v.Block
        typ := &b.Func.Config.Types
index 3e859a3516180c3e9afd50629e30c6f3842919bb..94f9331d15acad0b663e6d380eb542835376df2c 100644 (file)
@@ -25,5 +25,7 @@ var (
        x86HasSSE41  bool
        x86HasFMA    bool
 
+       armHasVFPv4 bool
+
        arm64HasATOMICS bool
 )
index 1a51b1d83b03e368ec362df32abde2116f3a4e16..71e756b991c1e73b09ea63c89cca8019a36ea96a 100644 (file)
@@ -516,6 +516,8 @@ func cpuinit() {
        x86HasSSE41 = cpu.X86.HasSSE41
        x86HasFMA = cpu.X86.HasFMA
 
+       armHasVFPv4 = cpu.ARM.HasVFPv4
+
        arm64HasATOMICS = cpu.ARM64.HasATOMICS
 }
 
index c942085480598d9e6da9cc0b99ae5bdc1e88cca6..751406d732e39c0eacb56696366b8323b07b01fc 100644 (file)
@@ -109,6 +109,7 @@ func copysign(a, b, c float64) {
 
 func fma(x, y, z float64) float64 {
        // amd64:"VFMADD231SD"
+       // arm/6:"FMULAD"
        // arm64:"FMADDD"
        // s390x:"FMADD"
        // ppc64:"FMADD"