From: Xiaolin Zhao Date: Tue, 17 Jun 2025 12:47:10 +0000 (+0800) Subject: cmd/compile: fold negation into multiplication on loong64 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a5527374181c936938676608d469baa41e9abe8b;p=gostls13.git cmd/compile: fold negation into multiplication on loong64 This change also add corresponding benchmark tests and codegen tests. The performance improvement on CPU Loongson-3A6000-HV is as follows: goos: linux goarch: loong64 pkg: cmd/compile/internal/test cpu: Loongson-3A6000-HV @ 2500.00MHz | bench.old | bench.new | | sec/op | sec/op vs base | MulNeg 828.4n ± 0% 655.9n ± 0% -20.82% (p=0.000 n=10) Mul2Neg 1062.0n ± 0% 826.8n ± 0% -22.15% (p=0.000 n=10) geomean 938.0n 736.4n -21.49% Change-Id: Ia999732880ec65be0c66cddc757a4868847e5b15 Reviewed-on: https://go-review.googlesource.com/c/go/+/682535 Reviewed-by: Dmitri Shuralyov Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Mark Freeman --- diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 9d0ad0148f..9ec4e4572e 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -755,6 +755,9 @@ (MULV x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)} +(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c])) +(MULV (NEGV x) (NEGV y)) => (MULV x y) + // div by constant (DIVVU x (MOVVconst [1])) => x (DIVVU x (MOVVconst [c])) && isPowerOfTwo(c) => (SRLVconst [log64(c)] x) diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 83242413f0..a74dd122e2 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -5539,6 +5539,7 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + typ := &b.Func.Config.Types // match: (MULV _ (MOVVconst [0])) // result: (MOVVconst [0]) for { @@ -5583,6 +5584,44 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { } break } + // match: (MULV (NEGV x) (MOVVconst [c])) + // result: (MULV x (MOVVconst [-c])) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpLOONG64NEGV { + continue + } + x := v_0.Args[0] + if v_1.Op != OpLOONG64MOVVconst { + continue + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpLOONG64MULV) + v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64) + v0.AuxInt = int64ToAuxInt(-c) + v.AddArg2(x, v0) + return true + } + break + } + // match: (MULV (NEGV x) (NEGV y)) + // result: (MULV x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpLOONG64NEGV { + continue + } + x := v_0.Args[0] + if v_1.Op != OpLOONG64NEGV { + continue + } + y := v_1.Args[0] + v.reset(OpLOONG64MULV) + v.AddArg2(x, y) + return true + } + break + } // match: (MULV (MOVVconst [c]) (MOVVconst [d])) // result: (MOVVconst [c*d]) for { diff --git a/src/cmd/compile/internal/test/bench_test.go b/src/cmd/compile/internal/test/bench_test.go index 4724600091..7303f672fe 100644 --- a/src/cmd/compile/internal/test/bench_test.go +++ b/src/cmd/compile/internal/test/bench_test.go @@ -122,3 +122,26 @@ func BenchmarkBitToggleConst(b *testing.B) { } } } + +func BenchmarkMulNeg(b *testing.B) { + x := make([]int64, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s = (-x[i]) * 11 + } + globl = s + } +} + +func BenchmarkMul2Neg(b *testing.B) { + x := make([]int64, 1024) + y := make([]int64, 1024) + for i := 0; i < b.N; i++ { + var s int64 + for i := range x { + s = (-x[i]) * (-y[i]) + } + globl = s + } +} diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index 9f400065bd..39a7986c7b 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -314,6 +314,18 @@ func MergeMuls5(a, n int) int { return a*n - 19*n // (a-19)n } +// Multiplications folded negation + +func FoldNegMul(a int) int { + // loong64:"MULV","MOVV\t[$]-11",-"SUBVU\tR[0-9], R0," + return (-a) * 11 +} + +func Fold2NegMul(a, b int) int { + // loong64:"MULV",-"SUBVU\tR[0-9], R0," + return (-a) * (-b) +} + // -------------- // // Division // // -------------- //