We already do this for floor/ceil, but RoundToEven was added later.
Intrinsify it also.
name old time/op new time/op delta
RoundToEven-8 3.00ns ± 1% 0.68ns ± 2% -77.34% (p=0.000 n=10+10)
Change-Id: Ib158cbceb436c6725b2d9353a526c5c4be19bcad
Reviewed-on: https://go-review.googlesource.com/74852
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
Reviewed-by: Keith Randall <khr@golang.org>
case ssa.OpAMD64ROUNDSD:
p := s.Prog(v.Op.Asm())
val := v.AuxInt
- // 1 means math.Floor, 2 Ceil, 3 Trunc
- if val != 1 && val != 2 && val != 3 {
+ // 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
+ if val != 0 && val != 1 && val != 2 && val != 3 {
v.Fatalf("Invalid rounding mode")
}
p.From.Offset = val
return s.variable(n, types.Types[TFLOAT64])
}
}
+ addF("math", "RoundToEven",
+ makeRoundAMD64(ssa.OpRoundToEven),
+ sys.AMD64)
addF("math", "Floor",
makeRoundAMD64(ssa.OpFloor),
sys.AMD64)
(Sqrt x) -> (SQRTSD x)
-(Floor x) -> (ROUNDSD [1] x)
-(Ceil x) -> (ROUNDSD [2] x)
-(Trunc x) -> (ROUNDSD [3] x)
+(RoundToEven x) -> (ROUNDSD [0] x)
+(Floor x) -> (ROUNDSD [1] x)
+(Ceil x) -> (ROUNDSD [2] x)
+(Trunc x) -> (ROUNDSD [3] x)
// Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
return rewriteValueAMD64_OpRound32F_0(v)
case OpRound64F:
return rewriteValueAMD64_OpRound64F_0(v)
+ case OpRoundToEven:
+ return rewriteValueAMD64_OpRoundToEven_0(v)
case OpRsh16Ux16:
return rewriteValueAMD64_OpRsh16Ux16_0(v)
case OpRsh16Ux32:
return true
}
}
+func rewriteValueAMD64_OpRoundToEven_0(v *Value) bool {
+ // match: (RoundToEven x)
+ // cond:
+ // result: (ROUNDSD [0] x)
+ for {
+ x := v.Args[0]
+ v.reset(OpAMD64ROUNDSD)
+ v.AuxInt = 0
+ v.AddArg(x)
+ return true
+ }
+}
func rewriteValueAMD64_OpRsh16Ux16_0(v *Value) bool {
b := v.Block
_ = b