This change also add corresponding benchmark tests and codegen tests.
The performance improvement on CPU Loongson-3A6000-HV is as follows:
goos: linux
goarch: loong64
pkg: cmd/compile/internal/test
cpu: Loongson-3A6000-HV @ 2500.00MHz
| bench.old | bench.new |
| sec/op | sec/op vs base |
MulNeg 828.4n ± 0% 655.9n ± 0% -20.82% (p=0.000 n=10)
Mul2Neg 1062.0n ± 0% 826.8n ± 0% -22.15% (p=0.000 n=10)
geomean 938.0n 736.4n -21.49%
Change-Id: Ia999732880ec65be0c66cddc757a4868847e5b15
Reviewed-on: https://go-review.googlesource.com/c/go/+/682535
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
(MULV x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
+(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c]))
+(MULV (NEGV x) (NEGV y)) => (MULV x y)
+
// div by constant
(DIVVU x (MOVVconst [1])) => x
(DIVVU x (MOVVconst [c])) && isPowerOfTwo(c) => (SRLVconst [log64(c)] x)
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
+ typ := &b.Func.Config.Types
// match: (MULV _ (MOVVconst [0]))
// result: (MOVVconst [0])
for {
}
break
}
+ // match: (MULV (NEGV x) (MOVVconst [c]))
+ // result: (MULV x (MOVVconst [-c]))
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpLOONG64NEGV {
+ continue
+ }
+ x := v_0.Args[0]
+ if v_1.Op != OpLOONG64MOVVconst {
+ continue
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ v.reset(OpLOONG64MULV)
+ v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+ v0.AuxInt = int64ToAuxInt(-c)
+ v.AddArg2(x, v0)
+ return true
+ }
+ break
+ }
+ // match: (MULV (NEGV x) (NEGV y))
+ // result: (MULV x y)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ if v_0.Op != OpLOONG64NEGV {
+ continue
+ }
+ x := v_0.Args[0]
+ if v_1.Op != OpLOONG64NEGV {
+ continue
+ }
+ y := v_1.Args[0]
+ v.reset(OpLOONG64MULV)
+ v.AddArg2(x, y)
+ return true
+ }
+ break
+ }
// match: (MULV (MOVVconst [c]) (MOVVconst [d]))
// result: (MOVVconst [c*d])
for {
}
}
}
+
+func BenchmarkMulNeg(b *testing.B) {
+ x := make([]int64, 1024)
+ for i := 0; i < b.N; i++ {
+ var s int64
+ for i := range x {
+ s = (-x[i]) * 11
+ }
+ globl = s
+ }
+}
+
+func BenchmarkMul2Neg(b *testing.B) {
+ x := make([]int64, 1024)
+ y := make([]int64, 1024)
+ for i := 0; i < b.N; i++ {
+ var s int64
+ for i := range x {
+ s = (-x[i]) * (-y[i])
+ }
+ globl = s
+ }
+}
return a*n - 19*n // (a-19)n
}
+// Multiplications folded negation
+
+func FoldNegMul(a int) int {
+ // loong64:"MULV","MOVV\t[$]-11",-"SUBVU\tR[0-9], R0,"
+ return (-a) * 11
+}
+
+func Fold2NegMul(a, b int) int {
+ // loong64:"MULV",-"SUBVU\tR[0-9], R0,"
+ return (-a) * (-b)
+}
+
// -------------- //
// Division //
// -------------- //