]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Sub64 on loong64
authorWayne Zuo <wdvxdr@golangcn.org>
Tue, 6 Sep 2022 14:29:31 +0000 (22:29 +0800)
committerDavid Chase <drchase@google.com>
Fri, 7 Oct 2022 18:16:26 +0000 (18:16 +0000)
This is a follow up of CL 420095  on loong64.

file                                    before    after     Δ       %
compile/internal/ssa.a                  35649482  35653274  +3792   +0.011%
compile/internal/ssagen.a               4099858   4098728   -1130   -0.028%
ecdh.a                                  227896    226896    -1000   -0.439%
internal/nistec/fiat.a                  1212254   1128184   -84070  -6.935%
tls.a                                   3256800   3256802   +2      +0.000%
big.a                                   1708518   1702496   -6022   -0.352%
bits.a                                  106762    105734    -1028   -0.963%
math.a                                  578762    577288    -1474   -0.255%
netip.a                                 555922    555610    -312    -0.056%
net.a                                   3286528   3286530   +2      +0.000%
golang.org/x/crypto/internal/poly1305.a 109546    107686    -1860   -1.698%
total                                   260392768 260299668 -93100  -0.036%

Change-Id: Ieffca705aae5666501f284502d986ca179dde494
Reviewed-on: https://go-review.googlesource.com/c/go/+/428557
Reviewed-by: Carlos Amedee <carlos@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>

src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/mathbits.go

index 74bc60a302750501d9a874b7bb34734199d9823d..33cc8a6ae3a26f2f957db998cb4f46155e81d4b2 100644 (file)
 (Select1 <t> (Add64carry x y c)) =>
        (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
 
+(Select0 <t> (Sub64borrow x y c)) => (SUBV (SUBV <t> x y) c)
+(Select1 <t> (Sub64borrow x y c)) =>
+       (OR (SGTU <t> s:(SUBV <t> x y) x) (SGTU <t> (SUBV <t> s c) s))
+
 // (x + y) / 2 with x>=y => (x - y) / 2 + y
 (Avg64u <t> x y) => (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
 
index e2d0d20efb3268d8f4750f6b60caad923b517c9c..fbe49e58194fbeea70bbdca3ea34d8f2998c17f4 100644 (file)
@@ -6844,6 +6844,22 @@ func rewriteValueLOONG64_OpSelect0(v *Value) bool {
                v.AddArg2(v0, c)
                return true
        }
+       // match: (Select0 <t> (Sub64borrow x y c))
+       // result: (SUBV (SUBV <t> x y) c)
+       for {
+               t := v.Type
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpLOONG64SUBV)
+               v0 := b.NewValue0(v.Pos, OpLOONG64SUBV, t)
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, c)
+               return true
+       }
        // match: (Select0 (DIVVU _ (MOVVconst [1])))
        // result: (MOVVconst [0])
        for {
@@ -6978,6 +6994,28 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool {
                v.AddArg2(v0, v2)
                return true
        }
+       // match: (Select1 <t> (Sub64borrow x y c))
+       // result: (OR (SGTU <t> s:(SUBV <t> x y) x) (SGTU <t> (SUBV <t> s c) s))
+       for {
+               t := v.Type
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpLOONG64OR)
+               v0 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
+               s := b.NewValue0(v.Pos, OpLOONG64SUBV, t)
+               s.AddArg2(x, y)
+               v0.AddArg2(s, x)
+               v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
+               v3 := b.NewValue0(v.Pos, OpLOONG64SUBV, t)
+               v3.AddArg2(s, c)
+               v2.AddArg2(v3, s)
+               v.AddArg2(v0, v2)
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [-1])))
        // result: (NEGV x)
        for {
index f4601c56cf7fb1d125a5c2040e1e973097e26a98..77307f4c2dfddfffd9e53346a9738da7c92f4667 100644 (file)
@@ -4736,8 +4736,8 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
-       alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64, sys.ArchS390X, sys.ArchRISCV64)
+               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64)
+       alias("math/bits", "Sub", "math/bits", "Sub64", p8...)
        addF("math/bits", "Div64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        // check for divide-by-zero/overflow and panic with appropriate message
index bd23b90b9b4057971721759d0d37680f11343913..acc9930c61d4573ef71447fb99f8ee552914e6c1 100644 (file)
@@ -627,6 +627,7 @@ func Add64MPanicOnOverflowGT(a, b [2]uint64) [2]uint64 {
 func Sub(x, y, ci uint) (r, co uint) {
        // amd64:"NEGL","SBBQ","NEGQ"
        // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
@@ -637,6 +638,7 @@ func Sub(x, y, ci uint) (r, co uint) {
 func SubC(x, ci uint) (r, co uint) {
        // amd64:"NEGL","SBBQ","NEGQ"
        // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
@@ -647,6 +649,7 @@ func SubC(x, ci uint) (r, co uint) {
 func SubZ(x, y uint) (r, co uint) {
        // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
        // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
        // s390x:"SUBC"
@@ -657,6 +660,7 @@ func SubZ(x, y uint) (r, co uint) {
 func SubR(x, y, ci uint) uint {
        // amd64:"NEGL","SBBQ",-"NEGQ"
        // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV",-"SGTU"
        // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // s390x:"SUBE"
@@ -679,6 +683,7 @@ func SubM(p, q, r *[3]uint) {
 func Sub64(x, y, ci uint64) (r, co uint64) {
        // amd64:"NEGL","SBBQ","NEGQ"
        // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
@@ -689,6 +694,7 @@ func Sub64(x, y, ci uint64) (r, co uint64) {
 func Sub64C(x, ci uint64) (r, co uint64) {
        // amd64:"NEGL","SBBQ","NEGQ"
        // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", "SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", "SUBE", "SUBZE", "NEG"
        // s390x:"SUBE"
@@ -699,6 +705,7 @@ func Sub64C(x, ci uint64) (r, co uint64) {
 func Sub64Z(x, y uint64) (r, co uint64) {
        // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
        // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
+       // loong64:"SUBV","SGTU"
        // ppc64:"SUBC", -"SUBE", "SUBZE", "NEG"
        // ppc64le:"SUBC", -"SUBE", "SUBZE", "NEG"
        // s390x:"SUBC"
@@ -709,6 +716,7 @@ func Sub64Z(x, y uint64) (r, co uint64) {
 func Sub64R(x, y, ci uint64) uint64 {
        // amd64:"NEGL","SBBQ",-"NEGQ"
        // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
+       // loong64:"SUBV",-"SGTU"
        // ppc64:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // ppc64le:"SUBC", "SUBE", -"SUBZE", -"NEG"
        // s390x:"SUBE"