]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: fix FP accuracy issue introduced by FMA optimization on ARM64
authorBen Shi <powerman1st@163.com>
Thu, 22 Feb 2018 13:55:01 +0000 (13:55 +0000)
committerCherry Zhang <cherryyz@google.com>
Thu, 22 Feb 2018 15:28:08 +0000 (15:28 +0000)
Two ARM64 rules are added to avoid FP accuracy issue, which causes
build failure.
https://build.golang.org/log/1360f5c9ef3f37968216350283c1013e9681725d

fixes #24033

Change-Id: I9b74b584ab5cc53fa49476de275dc549adf97610
Reviewed-on: https://go-review.googlesource.com/96355
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go

index 795b1a74c5fb42949c7c36f34c5da138baaeff00..014e7fc57c2006569a72baa32672121d6becb603 100644 (file)
@@ -590,6 +590,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Reg = v.Args[0].Reg()
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
+       case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
+               // input is already rounded
        case ssa.OpARM64VCNT:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
index 3822a378be6f7b1c15aaed4cefb0191806d59687..5b4d8b04f98a5a4da746007b7b8b0251cbd230f1 100644 (file)
 (Cvt32Fto64F x) -> (FCVTSD x)
 (Cvt64Fto32F x) -> (FCVTDS x)
 
-(Round32F x) -> x
-(Round64F x) -> x
+(Round32F x) -> (LoweredRound32F x)
+(Round64F x) -> (LoweredRound64F x)
 
 // comparisons
 (Eq8 x y)  -> (Equal (CMPW (ZeroExt8to32 x) (ZeroExt8to32 y)))
index 1d70c4e8648a28c13fc6a01753a7aafb4e055b4b..d712988bec4c5164ca46190b5298f409eb6ef8d4 100644 (file)
@@ -216,6 +216,8 @@ func init() {
                {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},       // count leading zero, 32-bit
                {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"},       // count set bits for each 8-bit unit and store the result in each 8-bit unit
                {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
+               {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
+               {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
 
                // 3-operand, the addend comes first
                {name: "FMADDS", argLength: 3, reg: fp31, asm: "FMADDS"},   // +arg0 + (arg1 * arg2)
index 5131e8d834d00b804862719b9b4e0b0ab2019396..09008d30328857af7cc172e7b4b80f35c0fc29c6 100644 (file)
@@ -1003,6 +1003,8 @@ const (
        OpARM64CLZW
        OpARM64VCNT
        OpARM64VUADDLV
+       OpARM64LoweredRound32F
+       OpARM64LoweredRound64F
        OpARM64FMADDS
        OpARM64FMADDD
        OpARM64FNMADDS
@@ -12765,6 +12767,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "LoweredRound32F",
+               argLen:       1,
+               resultInArg0: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:         "LoweredRound64F",
+               argLen:       1,
+               resultInArg0: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:   "FMADDS",
                argLen: 3,
index f711aade36618e9baf14760f09d6eda07b941f17..306b1339ee536e7029537628ace651c6efc8bfc9 100644 (file)
@@ -16318,11 +16318,10 @@ func rewriteValueARM64_OpRound_0(v *Value) bool {
 func rewriteValueARM64_OpRound32F_0(v *Value) bool {
        // match: (Round32F x)
        // cond:
-       // result: x
+       // result: (LoweredRound32F x)
        for {
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64LoweredRound32F)
                v.AddArg(x)
                return true
        }
@@ -16330,11 +16329,10 @@ func rewriteValueARM64_OpRound32F_0(v *Value) bool {
 func rewriteValueARM64_OpRound64F_0(v *Value) bool {
        // match: (Round64F x)
        // cond:
-       // result: x
+       // result: (LoweredRound64F x)
        for {
                x := v.Args[0]
-               v.reset(OpCopy)
-               v.Type = x.Type
+               v.reset(OpARM64LoweredRound64F)
                v.AddArg(x)
                return true
        }