Add rewrite rules to optimize math.Copysign() when the second
argument is negative floating point constant.
For example, math.Copysign(c, -2): The previous compile output is
"AND $
9223372036854775807, R0, R0; ORR $-
9223372036854775808, R0, R0".
The optimized compile output is "ORR $-
9223372036854775808, R0, R0"
Math package benchmark results.
name old time/op new time/op delta
Copysign-8 2.61ns ± 2% 2.49ns ± 0% -4.55% (p=0.000 n=10+10)
Cos-8 43.0ns ± 0% 41.5ns ± 0% -3.49% (p=0.000 n=10+10)
Cosh-8 98.6ns ± 0% 98.1ns ± 0% -0.51% (p=0.000 n=10+10)
ExpGo-8 107ns ± 0% 105ns ± 0% -1.87% (p=0.000 n=10+10)
Exp2Go-8 100ns ± 0% 100ns ± 0% +0.39% (p=0.000 n=10+8)
Max-8 6.56ns ± 2% 6.45ns ± 1% -1.63% (p=0.002 n=10+10)
Min-8 6.66ns ± 3% 6.47ns ± 2% -2.82% (p=0.006 n=10+10)
Mod-8 107ns ± 1% 104ns ± 1% -2.72% (p=0.000 n=10+10)
Frexp-8 11.5ns ± 1% 11.0ns ± 0% -4.56% (p=0.000 n=8+10)
HypotGo-8 19.4ns ± 0% 19.4ns ± 0% +0.36% (p=0.019 n=10+10)
Ilogb-8 8.63ns ± 0% 8.51ns ± 0% -1.36% (p=0.000 n=10+10)
Jn-8 584ns ± 0% 585ns ± 0% +0.17% (p=0.000 n=7+8)
Ldexp-8 13.8ns ± 0% 13.5ns ± 0% -2.17% (p=0.002 n=8+10)
Logb-8 10.2ns ± 0% 9.9ns ± 0% -2.65% (p=0.000 n=10+7)
Nextafter64-8 7.54ns ± 0% 7.51ns ± 0% -0.37% (p=0.000 n=10+10)
Remainder-8 73.5ns ± 1% 70.4ns ± 1% -4.27% (p=0.000 n=10+10)
SqrtGoLatency-8 79.6ns ± 0% 76.2ns ± 0% -4.30% (p=0.000 n=9+10)
Yn-8 582ns ± 0% 579ns ± 0% -0.52% (p=0.000 n=10+10)
Change-Id: I0c9cd1ea87435e7b8bab94b4e79e6e29785f25b1
Reviewed-on: https://go-review.googlesource.com/132915
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
// Load args directly into the register class where it will be used.
(FMOVDgpfp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
+(FMOVDfpgp <t> (Arg [off] {sym})) -> @b.Func.Entry (Arg <t> [off] {sym})
+
// Similarly for stores, if we see a store after FPR <-> GPR move, then redirect store to use the other register set.
(MOVDstore ptr (FMOVDfpgp val) mem) -> (FMOVDstore ptr val mem)
(FMOVDstore ptr (FMOVDgpfp val) mem) -> (MOVDstore ptr val mem)
(SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
(SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
+// Special case setting bit as 1. An example is math.Copysign(c,-1)
+(ORconst [c1] (ANDconst [c2] x)) && c2|c1 == ^0 -> (ORconst [c1] x)
+
// bitfield ops
// sbfiz
return rewriteValueARM64_OpARM64FADDD_0(v)
case OpARM64FADDS:
return rewriteValueARM64_OpARM64FADDS_0(v)
+ case OpARM64FMOVDfpgp:
+ return rewriteValueARM64_OpARM64FMOVDfpgp_0(v)
case OpARM64FMOVDgpfp:
return rewriteValueARM64_OpARM64FMOVDgpfp_0(v)
case OpARM64FMOVDload:
}
return false
}
+func rewriteValueARM64_OpARM64FMOVDfpgp_0(v *Value) bool {
+ b := v.Block
+ _ = b
+ // match: (FMOVDfpgp <t> (Arg [off] {sym}))
+ // cond:
+ // result: @b.Func.Entry (Arg <t> [off] {sym})
+ for {
+ t := v.Type
+ v_0 := v.Args[0]
+ if v_0.Op != OpArg {
+ break
+ }
+ off := v_0.AuxInt
+ sym := v_0.Aux
+ b = b.Func.Entry
+ v0 := b.NewValue0(v.Pos, OpArg, t)
+ v.reset(OpCopy)
+ v.AddArg(v0)
+ v0.AuxInt = off
+ v0.Aux = sym
+ return true
+ }
+ return false
+}
func rewriteValueARM64_OpARM64FMOVDgpfp_0(v *Value) bool {
b := v.Block
_ = b
v.AddArg(x)
return true
}
+ // match: (ORconst [c1] (ANDconst [c2] x))
+ // cond: c2|c1 == ^0
+ // result: (ORconst [c1] x)
+ for {
+ c1 := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpARM64ANDconst {
+ break
+ }
+ c2 := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c2|c1 == ^0) {
+ break
+ }
+ v.reset(OpARM64ORconst)
+ v.AuxInt = c1
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64ORshiftLL_0(v *Value) bool {
// amd64:"BTSQ\t[$]63"
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FCPSGN"
+ // arm64:"ORR\t[$]-9223372036854775808"
sink64[1] = math.Copysign(c, -1)
// Like math.Copysign(c, -1), but with integer operations. Useful