From: Wayne Zuo Date: Tue, 6 Sep 2022 14:12:16 +0000 (+0800) Subject: cmd/compile: intrinsify Add64 on loong64 X-Git-Tag: go1.20rc1~706 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=97760ed651f88341bcf15aa4980863c199b6f3dc;p=gostls13.git cmd/compile: intrinsify Add64 on loong64 This is a follow up of CL 420094 on loong64. Reduce go toolchain size slightly on linux/loong64. compilecmp HEAD~1 -> HEAD HEAD~1 (8a32354219): internal/trace: use strings.Builder HEAD (1767784ac3): cmd/compile: intrinsify Add64 on loong64 platform: linux/loong64 file before after Δ % addr2line 3882616 3882536 -80 -0.002% api 5528866 5528450 -416 -0.008% asm 5133780 5133796 +16 +0.000% cgo 4668787 4668491 -296 -0.006% compile 25163409 25164729 +1320 +0.005% cover 4658055 4658007 -48 -0.001% dist 3437783 3437727 -56 -0.002% doc 3883069 3883205 +136 +0.004% fix 3383254 3383070 -184 -0.005% link 6747559 6747023 -536 -0.008% nm 3793923 3793939 +16 +0.000% objdump 4256628 4256812 +184 +0.004% pack 2356328 2356144 -184 -0.008% pprof 14233370 14131910 -101460 -0.713% test2json 2638668 2638476 -192 -0.007% trace 13392065 13360781 -31284 -0.234% vet 7456388 7455588 -800 -0.011% total 132498256 132364392 -133864 -0.101% file before after Δ % compile/internal/ssa.a 35644590 35649482 +4892 +0.014% compile/internal/ssagen.a 4101250 4099858 -1392 -0.034% internal/edwards25519/field.a 226064 201718 -24346 -10.770% internal/nistec/fiat.a 1689922 1212254 -477668 -28.266% tls.a 3256798 3256800 +2 +0.000% big.a 1718552 1708518 -10034 -0.584% bits.a 107786 106762 -1024 -0.950% cmplx.a 169434 168214 -1220 -0.720% math.a 581302 578762 -2540 -0.437% netip.a 556096 555922 -174 -0.031% net.a 3286526 3286528 +2 +0.000% runtime.a 8644786 8644510 -276 -0.003% strconv.a 519098 518374 -724 -0.139% golang.org/x/crypto/internal/poly1305.a 115398 109546 -5852 -5.071% total 260913122 260392768 -520354 -0.199% Change-Id: I75b2bb7761fa5a0d0d032d4ebe3582d092ea77be Reviewed-on: https://go-review.googlesource.com/c/go/+/428556 Reviewed-by: Carlos Amedee Run-TryBot: Wayne Zuo Reviewed-by: David Chase TryBot-Result: Gopher Robot --- diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 431d95f096..74bc60a302 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -38,6 +38,10 @@ (Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y))) (Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y))) +(Select0 (Add64carry x y c)) => (ADDV (ADDV x y) c) +(Select1 (Add64carry x y c)) => + (OR (SGTU x s:(ADDV x y)) (SGTU s (ADDV s c))) + // (x + y) / 2 with x>=y => (x - y) / 2 + y (Avg64u x y) => (ADDV (SRLVconst (SUBV x y) [1]) y) @@ -675,3 +679,7 @@ (GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes) (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no) (GEZ (MOVVconst [c]) yes no) && c < 0 => (First no yes) + +// SGT/SGTU with known outcomes. +(SGT x x) => (MOVVconst [0]) +(SGTU x x) => (MOVVconst [0]) diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index d2e7e8e061..e2d0d20efb 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -3367,6 +3367,17 @@ func rewriteValueLOONG64_OpLOONG64SGT(v *Value) bool { v.AddArg(x) return true } + // match: (SGT x x) + // result: (MOVVconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpLOONG64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool { @@ -3389,6 +3400,17 @@ func rewriteValueLOONG64_OpLOONG64SGTU(v *Value) bool { v.AddArg(x) return true } + // match: (SGTU x x) + // result: (MOVVconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpLOONG64MOVVconst) + v.AuxInt = int64ToAuxInt(0) + return true + } return false } func rewriteValueLOONG64_OpLOONG64SGTUconst(v *Value) bool { @@ -6806,6 +6828,22 @@ func rewriteValueLOONG64_OpSelect0(v *Value) bool { v.AddArg(v0) return true } + // match: (Select0 (Add64carry x y c)) + // result: (ADDV (ADDV x y) c) + for { + t := v.Type + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpLOONG64ADDV) + v0 := b.NewValue0(v.Pos, OpLOONG64ADDV, t) + v0.AddArg2(x, y) + v.AddArg2(v0, c) + return true + } // match: (Select0 (DIVVU _ (MOVVconst [1]))) // result: (MOVVconst [0]) for { @@ -6918,6 +6956,28 @@ func rewriteValueLOONG64_OpSelect1(v *Value) bool { v.AddArg2(v0, v2) return true } + // match: (Select1 (Add64carry x y c)) + // result: (OR (SGTU x s:(ADDV x y)) (SGTU s (ADDV s c))) + for { + t := v.Type + if v_0.Op != OpAdd64carry { + break + } + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpLOONG64OR) + v0 := b.NewValue0(v.Pos, OpLOONG64SGTU, t) + s := b.NewValue0(v.Pos, OpLOONG64ADDV, t) + s.AddArg2(x, y) + v0.AddArg2(x, s) + v2 := b.NewValue0(v.Pos, OpLOONG64SGTU, t) + v3 := b.NewValue0(v.Pos, OpLOONG64ADDV, t) + v3.AddArg2(s, c) + v2.AddArg2(s, v3) + v.AddArg2(v0, v2) + return true + } // match: (Select1 (MULVU x (MOVVconst [-1]))) // result: (NEGV x) for { diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index bafa385579..f4601c56cf 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -1692,12 +1692,12 @@ func (s *state) stmt(n ir.Node) { // Currently doesn't really work because (*p)[:len(*p)] appears here as: // tmp = len(*p) // (*p)[:tmp] - //if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) { + // if j != nil && (j.Op == OLEN && SameSafeExpr(j.Left, n.Left)) { // j = nil - //} - //if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) { + // } + // if k != nil && (k.Op == OCAP && SameSafeExpr(k.Left, n.Left)) { // k = nil - //} + // } if i == nil { skip |= skipPtr if j == nil { @@ -4730,8 +4730,8 @@ func InitTables() { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64) - alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchRISCV64) + sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64) + alias("math/bits", "Add", "math/bits", "Add64", p8...) addF("math/bits", "Sub64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2]) diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 0620766f5a..bd23b90b9b 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -451,6 +451,7 @@ func Add(x, y, ci uint) (r, co uint) { func AddC(x, ci uint) (r, co uint) { // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," @@ -461,6 +462,7 @@ func AddC(x, ci uint) (r, co uint) { func AddZ(x, y uint) (r, co uint) { // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" + // loong64: "ADDV", "SGTU" // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," @@ -471,6 +473,7 @@ func AddZ(x, y uint) (r, co uint) { func AddR(x, y, ci uint) uint { // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" + // loong64: "ADDV", -"SGTU" // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1," @@ -492,6 +495,7 @@ func AddM(p, q, r *[3]uint) { func Add64(x, y, ci uint64) (r, co uint64) { // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," @@ -502,6 +506,7 @@ func Add64(x, y, ci uint64) (r, co uint64) { func Add64C(x, ci uint64) (r, co uint64) { // arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ","SBBQ","NEGQ" + // loong64: "ADDV", "SGTU" // ppc64: "ADDC", "ADDE", "ADDZE" // ppc64le: "ADDC", "ADDE", "ADDZE" // s390x:"ADDE","ADDC\t[$]-1," @@ -512,6 +517,7 @@ func Add64C(x, ci uint64) (r, co uint64) { func Add64Z(x, y uint64) (r, co uint64) { // arm64:"ADDS","ADC",-"ADCS",-"ADD\t",-"CMP" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" + // loong64: "ADDV", "SGTU" // ppc64: "ADDC", -"ADDE", "ADDZE" // ppc64le: "ADDC", -"ADDE", "ADDZE" // s390x:"ADDC",-"ADDC\t[$]-1," @@ -522,6 +528,7 @@ func Add64Z(x, y uint64) (r, co uint64) { func Add64R(x, y, ci uint64) uint64 { // arm64:"ADDS","ADCS",-"ADD\t",-"CMP" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" + // loong64: "ADDV", -"SGTU" // ppc64: "ADDC", "ADDE", -"ADDZE" // ppc64le: "ADDC", "ADDE", -"ADDZE" // s390x:"ADDE","ADDC\t[$]-1,"