]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Sub64 on riscv64
authorWayne Zuo <wdvxdr@golangcn.org>
Fri, 29 Jul 2022 14:14:53 +0000 (22:14 +0800)
committerJoel Sing <joel@sing.id.au>
Sat, 27 Aug 2022 05:43:59 +0000 (05:43 +0000)
After this CL, the performance difference in crypto/elliptic
benchmarks on linux/riscv64 are:

name                 old time/op    new time/op    delta
ScalarBaseMult/P256    1.64ms ± 1%    1.60ms ± 1%   -2.36%  (p=0.008 n=5+5)
ScalarBaseMult/P224    1.53ms ± 1%    1.47ms ± 2%   -4.24%  (p=0.008 n=5+5)
ScalarBaseMult/P384    5.12ms ± 2%    5.03ms ± 2%     ~     (p=0.095 n=5+5)
ScalarBaseMult/P521    22.3ms ± 2%    13.8ms ± 1%  -37.89%  (p=0.008 n=5+5)
ScalarMult/P256        4.49ms ± 2%    4.26ms ± 2%   -5.13%  (p=0.008 n=5+5)
ScalarMult/P224        4.33ms ± 1%    4.09ms ± 1%   -5.59%  (p=0.008 n=5+5)
ScalarMult/P384        16.3ms ± 1%    15.5ms ± 2%   -4.78%  (p=0.008 n=5+5)
ScalarMult/P521         101ms ± 0%      47ms ± 2%  -53.36%  (p=0.008 n=5+5)

Change-Id: I31cf0506e27f9d85f576af1813630a19c20dda8a
Reviewed-on: https://go-review.googlesource.com/c/go/+/420095
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/cmd/compile/internal/ssa/gen/RISCV64.rules
src/cmd/compile/internal/ssa/rewriteRISCV64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/mathbits.go

index 9d2d785d0eab56c6a8d61224911c5ce9fe5ba8e5..9ba0d9c1af358c5b0f4fab5cf9e5980189045b70 100644 (file)
 (Select1 (Add64carry x y c)) =>
        (OR (SLTU <typ.UInt64> s:(ADD <typ.UInt64> x y) x) (SLTU <typ.UInt64> (ADD <typ.UInt64> s c) s))
 
+(Select0 (Sub64borrow x y c)) => (SUB (SUB <typ.UInt64> x y) c)
+(Select1 (Sub64borrow x y c)) =>
+       (OR (SLTU <typ.UInt64> x s:(SUB <typ.UInt64> x y)) (SLTU <typ.UInt64> s (SUB <typ.UInt64> s c)))
+
 // (x + y) / 2 => (x / 2) + (y / 2) + (x & y & 1)
 (Avg64u <t> x y) => (ADD (ADD <t> (SRLI <t> [1] x) (SRLI <t> [1] y)) (ANDI <t> [1] (AND <t> x y)))
 
index e4e4003f34e2416eb37491aedd8d1410dfd7148b..8e7bdc0a2a9fb9d871a17277cfecdee984431579 100644 (file)
@@ -6089,6 +6089,21 @@ func rewriteValueRISCV64_OpSelect0(v *Value) bool {
                v.AddArg2(v0, c)
                return true
        }
+       // match: (Select0 (Sub64borrow x y c))
+       // result: (SUB (SUB <typ.UInt64> x y) c)
+       for {
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpRISCV64SUB)
+               v0 := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64)
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, c)
+               return true
+       }
        // match: (Select0 m:(LoweredMuluhilo x y))
        // cond: m.Uses == 1
        // result: (MULHU x y)
@@ -6133,6 +6148,27 @@ func rewriteValueRISCV64_OpSelect1(v *Value) bool {
                v.AddArg2(v0, v2)
                return true
        }
+       // match: (Select1 (Sub64borrow x y c))
+       // result: (OR (SLTU <typ.UInt64> x s:(SUB <typ.UInt64> x y)) (SLTU <typ.UInt64> s (SUB <typ.UInt64> s c)))
+       for {
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpRISCV64OR)
+               v0 := b.NewValue0(v.Pos, OpRISCV64SLTU, typ.UInt64)
+               s := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64)
+               s.AddArg2(x, y)
+               v0.AddArg2(x, s)
+               v2 := b.NewValue0(v.Pos, OpRISCV64SLTU, typ.UInt64)
+               v3 := b.NewValue0(v.Pos, OpRISCV64SUB, typ.UInt64)
+               v3.AddArg2(s, c)
+               v2.AddArg2(s, v3)
+               v.AddArg2(v0, v2)
+               return true
+       }
        // match: (Select1 m:(LoweredMuluhilo x y))
        // cond: m.Uses == 1
        // result: (MUL x y)
index 107944170fcaaa6137e046483e10c6599d92788f..26e14e2d21ce325f22ae75dffa9815fef0c377d4 100644 (file)
@@ -4732,8 +4732,8 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X)
-       alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X)
+               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
+       alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X, sys.ArchRISCV64)
        addF("math/bits", "Div64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        // check for divide-by-zero/overflow and panic with appropriate message
index f36916ad03a37e6ab895c204ea0b3c65e999fe43..9c643647ee900bd644673fb718d60c314851d712 100644 (file)
@@ -621,6 +621,7 @@ func Sub(x, y, ci uint) (r, co uint) {
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB","SLTU"
        return bits.Sub(x, y, ci)
 }
 
@@ -630,6 +631,7 @@ func SubC(x, ci uint) (r, co uint) {
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB","SLTU"
        return bits.Sub(x, 7, ci)
 }
 
@@ -639,6 +641,7 @@ func SubZ(x, y uint) (r, co uint) {
        // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
        // s390x:"SUBC"
+       // riscv64: "SUB","SLTU"
        return bits.Sub(x, y, 0)
 }
 
@@ -648,6 +651,7 @@ func SubR(x, y, ci uint) uint {
        // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB",-"SLTU"
        r, _ := bits.Sub(x, y, ci)
        return r
 }
@@ -669,6 +673,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB","SLTU"
        return bits.Sub64(x, y, ci)
 }
 
@@ -678,6 +683,7 @@ func Sub64C(x, ci uint64) (r, co uint64) {
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB","SLTU"
        return bits.Sub64(x, 7, ci)
 }
 
@@ -687,6 +693,7 @@ func Sub64Z(x, y uint64) (r, co uint64) {
        // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
        // s390x:"SUBC"
+       // riscv64: "SUB","SLTU"
        return bits.Sub64(x, y, 0)
 }
 
@@ -696,6 +703,7 @@ func Sub64R(x, y, ci uint64) uint64 {
        // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // s390x:"SUBE"
+       // riscv64: "SUB",-"SLTU"
        r, _ := bits.Sub64(x, y, ci)
        return r
 }