From: Wayne Zuo Date: Fri, 29 Jul 2022 14:14:53 +0000 (+0800) Subject: cmd/compile: intrinsify Sub64 on riscv64 X-Git-Tag: go1.20rc1~1373 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a6219737e3eb062282e6483a915c395affb30c69;p=gostls13.git cmd/compile: intrinsify Sub64 on riscv64 After this CL, the performance difference in crypto/elliptic benchmarks on linux/riscv64 are: name old time/op new time/op delta ScalarBaseMult/P256 1.64ms ± 1% 1.60ms ± 1% -2.36% (p=0.008 n=5+5) ScalarBaseMult/P224 1.53ms ± 1% 1.47ms ± 2% -4.24% (p=0.008 n=5+5) ScalarBaseMult/P384 5.12ms ± 2% 5.03ms ± 2% ~ (p=0.095 n=5+5) ScalarBaseMult/P521 22.3ms ± 2% 13.8ms ± 1% -37.89% (p=0.008 n=5+5) ScalarMult/P256 4.49ms ± 2% 4.26ms ± 2% -5.13% (p=0.008 n=5+5) ScalarMult/P224 4.33ms ± 1% 4.09ms ± 1% -5.59% (p=0.008 n=5+5) ScalarMult/P384 16.3ms ± 1% 15.5ms ± 2% -4.78% (p=0.008 n=5+5) ScalarMult/P521 101ms ± 0% 47ms ± 2% -53.36% (p=0.008 n=5+5) Change-Id: I31cf0506e27f9d85f576af1813630a19c20dda8a Reviewed-on: https://go-review.googlesource.com/c/go/+/420095 Reviewed-by: Cherry Mui Reviewed-by: Joel Sing Reviewed-by: David Chase Run-TryBot: Wayne Zuo TryBot-Result: Gopher Robot --- diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 9d2d785d0e..9ba0d9c1af 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -56,6 +56,10 @@ (Select1 (Add64carry x y c)) => (OR (SLTU s:(ADD x y) x) (SLTU (ADD s c) s)) +(Select0 (Sub64borrow x y c)) => (SUB (SUB x y) c) +(Select1 (Sub64borrow x y c)) => + (OR (SLTU x s:(SUB x y)) (SLTU s (SUB s c))) + // (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1) (Avg64u x y) => (ADD (ADD (SRLI [1] x) (SRLI [1] y)) (ANDI [1] (AND x y))) diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index e4e4003f34..8e7bdc0a2a 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -6089,6 +6089,21 @@ func rewriteValueRISCV64_OpSelect0(v *Value) bool { v.AddArg2(v0, c) return true } + // match: (Select0 (Sub64borrow x y c)) + // result: (SUB (SUB x y) c) + for { + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpRISCV64SUB) + v0 := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64) + v0.AddArg2(x, y) + v.AddArg2(v0, c) + return true + } // match: (Select0 m:(LoweredMuluhilo x y)) // cond: m.Uses == 1 // result: (MULHU x y) @@ -6133,6 +6148,27 @@ func rewriteValueRISCV64_OpSelect1(v *Value) bool { v.AddArg2(v0, v2) return true } + // match: (Select1 (Sub64borrow x y c)) + // result: (OR (SLTU x s:(SUB x y)) (SLTU s (SUB s c))) + for { + if v_0.Op != OpSub64borrow { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64SLTU, typ.UInt64) + s := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64) + s.AddArg2(x, y) + v0.AddArg2(x, s) + v2 := b.NewValue0(v.Pos, OpRISCV64SLTU, typ.UInt64) + v3 := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64) + v3.AddArg2(s, c) + v2.AddArg2(s, v3) + v.AddArg2(v0, v2) + return true + } // match: (Select1 m:(LoweredMuluhilo x y)) // cond: m.Uses == 1 // result: (MUL x y) diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 107944170f..26e14e2d21 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -4732,8 +4732,8 @@ func InitTables() { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X) - alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X) + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64) + alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X, sys.ArchRISCV64) addF("math/bits", "Div64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { // check for divide-by-zero/overflow and panic with appropriate message diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index f36916ad03..9c643647ee 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -621,6 +621,7 @@ func Sub(x, y, ci uint) (r, co uint) { // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" // s390x:"SUBE" + // riscv64: "SUB","SLTU" return bits.Sub(x, y, ci) } @@ -630,6 +631,7 @@ func SubC(x, ci uint) (r, co uint) { // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" // s390x:"SUBE" + // riscv64: "SUB","SLTU" return bits.Sub(x, 7, ci) } @@ -639,6 +641,7 @@ func SubZ(x, y uint) (r, co uint) { // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" // s390x:"SUBC" + // riscv64: "SUB","SLTU" return bits.Sub(x, y, 0) } @@ -648,6 +651,7 @@ func SubR(x, y, ci uint) uint { // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" // s390x:"SUBE" + // riscv64: "SUB",-"SLTU" r, _ := bits.Sub(x, y, ci) return r } @@ -669,6 +673,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) { // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" // s390x:"SUBE" + // riscv64: "SUB","SLTU" return bits.Sub64(x, y, ci) } @@ -678,6 +683,7 @@ func Sub64C(x, ci uint64) (r, co uint64) { // ppc64:"SUBC", "SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG" // s390x:"SUBE" + // riscv64: "SUB","SLTU" return bits.Sub64(x, 7, ci) } @@ -687,6 +693,7 @@ func Sub64Z(x, y uint64) (r, co uint64) { // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG" // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG" // s390x:"SUBC" + // riscv64: "SUB","SLTU" return bits.Sub64(x, y, 0) } @@ -696,6 +703,7 @@ func Sub64R(x, y, ci uint64) uint64 { // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG" // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG" // s390x:"SUBE" + // riscv64: "SUB",-"SLTU" r, _ := bits.Sub64(x, y, ci) return r }