]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Add64 on loong64
authorWayne Zuo <wdvxdr@golangcn.org>
Tue, 6 Sep 2022 14:12:16 +0000 (22:12 +0800)
committerDavid Chase <drchase@google.com>
Fri, 7 Oct 2022 18:16:10 +0000 (18:16 +0000)
This is a follow up of CL 420094  on loong64.

Reduce go toolchain size slightly on linux/loong64.

compilecmp HEAD~1 -> HEAD
HEAD~1 (8a32354219): internal/trace: use strings.Builder
HEAD (1767784ac3): cmd/compile: intrinsify Add64 on loong64
platform: linux/loong64

file      before    after     Δ       %
addr2line 3882616   3882536   -80     -0.002%
api       5528866   5528450   -416    -0.008%
asm       5133780   5133796   +16     +0.000%
cgo       4668787   4668491   -296    -0.006%
compile   25163409  25164729  +1320   +0.005%
cover     4658055   4658007   -48     -0.001%
dist      3437783   3437727   -56     -0.002%
doc       3883069   3883205   +136    +0.004%
fix       3383254   3383070   -184    -0.005%
link      6747559   6747023   -536    -0.008%
nm        3793923   3793939   +16     +0.000%
objdump   4256628   4256812   +184    +0.004%
pack      2356328   2356144   -184    -0.008%
pprof     14233370  14131910  -101460 -0.713%
test2json 2638668   2638476   -192    -0.007%
trace     13392065  13360781  -31284  -0.234%
vet       7456388   7455588   -800    -0.011%
total     132498256 132364392 -133864 -0.101%

file                                    before    after     Δ       %
compile/internal/ssa.a                  35644590  35649482  +4892   +0.014%
compile/internal/ssagen.a               4101250   4099858   -1392   -0.034%
internal/edwards25519/field.a           226064    201718    -24346  -10.770%
internal/nistec/fiat.a                  1689922   1212254   -477668 -28.266%
tls.a                                   3256798   3256800   +2      +0.000%
big.a                                   1718552   1708518   -10034  -0.584%
bits.a                                  107786    106762    -1024   -0.950%
cmplx.a                                 169434    168214    -1220   -0.720%
math.a                                  581302    578762    -2540   -0.437%
netip.a                                 556096    555922    -174    -0.031%
net.a                                   3286526   3286528   +2      +0.000%
runtime.a                               8644786   8644510   -276    -0.003%
strconv.a                               519098    518374    -724    -0.139%
golang.org/x/crypto/internal/poly1305.a 115398    109546    -5852   -5.071%
total                                   260913122 260392768 -520354 -0.199%

Change-Id: I75b2bb7761fa5a0d0d032d4ebe3582d092ea77be
Reviewed-on: https://go-review.googlesource.com/c/go/+/428556
Reviewed-by: Carlos Amedee <carlos@golang.org>
Run-TryBot: Wayne Zuo <wdvxdr@golangcn.org>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>

src/cmd/compile/internal/ssa/_gen/LOONG64.rules
src/cmd/compile/internal/ssa/rewriteLOONG64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/mathbits.go

index 431d95f0969d291922682da69e92c53fde7b54da..74bc60a302750501d9a874b7bb34734199d9823d 100644 (file)
 (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
 (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
 
+(Select0 <t> (Add64carry x y c)) => (ADDV (ADDV <t> x y) c)
+(Select1 <t> (Add64carry x y c)) =>
+       (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
+
 // (x + y) / 2 with x>=y => (x - y) / 2 + y
 (Avg64u <t> x y) => (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
 
 (GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes)
 (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
 (GEZ (MOVVconst [c]) yes no) && c <  0 => (First no yes)
+
+// SGT/SGTU with known outcomes.
+(SGT  x x) => (MOVVconst [0])
+(SGTU x x) => (MOVVconst [0])
index d2e7e8e061ffc6b6b7badb95667c58f3a4e5024f..e2d0d20efb3268d8f4750f6b60caad923b517c9c 100644 (file)
@@ -3367,6 +3367,17 @@ func rewriteValueLOONG64_OpLOONG64SGT(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (SGT x x)
+       // result: (MOVVconst [0])
+       for {
+               x := v_0
+               if x != v_1 {
+                       break
+               }
+               v.reset(OpLOONG64MOVVconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
        return false
 }
 func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool {
@@ -3389,6 +3400,17 @@ func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (SGTU x x)
+       // result: (MOVVconst [0])
+       for {
+               x := v_0
+               if x != v_1 {
+                       break
+               }
+               v.reset(OpLOONG64MOVVconst)
+               v.AuxInt = int64ToAuxInt(0)
+               return true
+       }
        return false
 }
 func rewriteValueLOONG64_OpLOONG64SGTUconst(v *Value) bool {
@@ -6806,6 +6828,22 @@ func rewriteValueLOONG64_OpSelect0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Select0 <t> (Add64carry x y c))
+       // result: (ADDV (ADDV <t> x y) c)
+       for {
+               t := v.Type
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpLOONG64ADDV)
+               v0 := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
+               v0.AddArg2(x, y)
+               v.AddArg2(v0, c)
+               return true
+       }
        // match: (Select0 (DIVVU _ (MOVVconst [1])))
        // result: (MOVVconst [0])
        for {
@@ -6918,6 +6956,28 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool {
                v.AddArg2(v0, v2)
                return true
        }
+       // match: (Select1 <t> (Add64carry x y c))
+       // result: (OR (SGTU <t> x s:(ADDV <t> x y)) (SGTU <t> s (ADDV <t> s c)))
+       for {
+               t := v.Type
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpLOONG64OR)
+               v0 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
+               s := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
+               s.AddArg2(x, y)
+               v0.AddArg2(x, s)
+               v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, t)
+               v3 := b.NewValue0(v.Pos, OpLOONG64ADDV, t)
+               v3.AddArg2(s, c)
+               v2.AddArg2(s, v3)
+               v.AddArg2(v0, v2)
+               return true
+       }
        // match: (Select1 (MULVU x (MOVVconst [-1])))
        // result: (NEGV x)
        for {
index bafa385579096f59341867d27caaea73fe2e0d5d..f4601c56cf7fb1d125a5c2040e1e973097e26a98 100644 (file)
@@ -1692,12 +1692,12 @@ func (s *state) stmt(n ir.Node) {
                        // Currently doesn't really work because (*p)[:len(*p)] appears here as:
                        //    tmp = len(*p)
                        //    (*p)[:tmp]
-                       //if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) {
+                       // if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) {
                        //      j = nil
-                       //}
-                       //if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) {
+                       // }
+                       // if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) {
                        //      k = nil
-                       //}
+                       // }
                        if i == nil {
                                skip |= skipPtr
                                if j == nil {
@@ -4730,8 +4730,8 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64)
-       alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchRISCV64)
+               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64)
+       alias("math/bits", "Add", "math/bits", "Add64", p8...)
        addF("math/bits", "Sub64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
index 0620766f5aea503e8bdf5fb2528da9f25638ce2e..bd23b90b9b4057971721759d0d37680f11343913 100644 (file)
@@ -451,6 +451,7 @@ func Add(x, y, ci uint) (r, co uint) {
 func AddC(x, ci uint) (r, co uint) {
        // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
        // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+       // loong64: "ADDV", "SGTU"
        // ppc64: "ADDC", "ADDE", "ADDZE"
        // ppc64le: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
@@ -461,6 +462,7 @@ func AddC(x, ci uint) (r, co uint) {
 func AddZ(x, y uint) (r, co uint) {
        // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
        // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+       // loong64: "ADDV", "SGTU"
        // ppc64: "ADDC", -"ADDE", "ADDZE"
        // ppc64le: "ADDC", -"ADDE", "ADDZE"
        // s390x:"ADDC",-"ADDC\t[$]-1,"
@@ -471,6 +473,7 @@ func AddZ(x, y uint) (r, co uint) {
 func AddR(x, y, ci uint) uint {
        // arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
        // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+       // loong64: "ADDV", -"SGTU"
        // ppc64: "ADDC", "ADDE", -"ADDZE"
        // ppc64le: "ADDC", "ADDE", -"ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
@@ -492,6 +495,7 @@ func AddM(p, q, r *[3]uint) {
 func Add64(x, y, ci uint64) (r, co uint64) {
        // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
        // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+       // loong64: "ADDV", "SGTU"
        // ppc64: "ADDC", "ADDE", "ADDZE"
        // ppc64le: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
@@ -502,6 +506,7 @@ func Add64(x, y, ci uint64) (r, co uint64) {
 func Add64C(x, ci uint64) (r, co uint64) {
        // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
        // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
+       // loong64: "ADDV", "SGTU"
        // ppc64: "ADDC", "ADDE", "ADDZE"
        // ppc64le: "ADDC", "ADDE", "ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"
@@ -512,6 +517,7 @@ func Add64C(x, ci uint64) (r, co uint64) {
 func Add64Z(x, y uint64) (r, co uint64) {
        // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP"
        // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
+       // loong64: "ADDV", "SGTU"
        // ppc64: "ADDC", -"ADDE", "ADDZE"
        // ppc64le: "ADDC", -"ADDE", "ADDZE"
        // s390x:"ADDC",-"ADDC\t[$]-1,"
@@ -522,6 +528,7 @@ func Add64Z(x, y uint64) (r, co uint64) {
 func Add64R(x, y, ci uint64) uint64 {
        // arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
        // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
+       // loong64: "ADDV", -"SGTU"
        // ppc64: "ADDC", "ADDE", -"ADDZE"
        // ppc64le: "ADDC", "ADDE", -"ADDZE"
        // s390x:"ADDE","ADDC\t[$]-1,"