]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify Mul64 on riscv64
authorMeng Zhuo <mzh@golangcn.org>
Tue, 22 Jun 2021 11:20:03 +0000 (11:20 +0000)
committerMeng Zhuo <mzh@golangcn.org>
Mon, 16 Aug 2021 13:50:11 +0000 (13:50 +0000)
According to RISCV instruction set manual v2.2 Sec 6.1
MULHU followed by MUL will be fused into one multiply by microarchitecture

Benchstat on Hifive unmatched:
name          old time/op    new time/op    delta
Hash8Bytes       245ns ± 3%     186ns ± 4%  -23.99%  (p=0.000 n=10+10)
Hash320Bytes    1.94µs ± 1%    1.31µs ± 1%  -32.38%  (p=0.000 n=9+10)
Hash1K          5.84µs ± 0%    3.84µs ± 0%  -34.20%  (p=0.000 n=10+9)
Hash8K          45.3µs ± 0%    29.4µs ± 0%  -35.04%  (p=0.000 n=10+10)

name          old speed      new speed      delta
Hash8Bytes    32.7MB/s ± 3%  43.0MB/s ± 4%  +31.61%  (p=0.000 n=10+10)
Hash320Bytes   165MB/s ± 1%   244MB/s ± 1%  +47.88%  (p=0.000 n=9+10)
Hash1K         175MB/s ± 0%   266MB/s ± 0%  +51.98%  (p=0.000 n=10+9)
Hash8K         181MB/s ± 0%   279MB/s ± 0%  +53.94%  (p=0.000 n=10+10)

Change-Id: I3561495d02a4a0ad8578e9b9819bf0a4eaca5d12
Reviewed-on: https://go-review.googlesource.com/c/go/+/329970
Reviewed-by: Joel Sing <joel@sing.id.au>
Run-TryBot: Joel Sing <joel@sing.id.au>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Meng Zhuo <mzh@golangcn.org>

src/cmd/compile/internal/riscv64/ssa.go
src/cmd/compile/internal/ssa/gen/RISCV64.rules
src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteRISCV64.go
src/cmd/compile/internal/ssagen/ssa.go
test/codegen/mathbits.go
test/run.go

index 64a9b3b33b9aca34cfd15284ff14eb765a67d52a..c635d93b719d291c8019f0f4afcb8260893b5d3b 100644 (file)
@@ -282,6 +282,21 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.Reg = r1
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
+       case ssa.OpRISCV64LoweredMuluhilo:
+               r0 := v.Args[0].Reg()
+               r1 := v.Args[1].Reg()
+               p := s.Prog(riscv.AMULHU)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = r1
+               p.Reg = r0
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg0()
+               p1 := s.Prog(riscv.AMUL)
+               p1.From.Type = obj.TYPE_REG
+               p1.From.Reg = r1
+               p1.Reg = r0
+               p1.To.Type = obj.TYPE_REG
+               p1.To.Reg = v.Reg1()
        case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
                ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
                ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
index 9cdd62edbe01c004f4b79951d38a4d5cd123a2a3..b21ebe6abbd1cf157116cde39361d582bfb7f6b9 100644 (file)
@@ -29,6 +29,7 @@
 (Sub64F ...) => (FSUBD ...)
 
 (Mul64 ...) => (MUL  ...)
+(Mul64uhilo ...) => (LoweredMuluhilo ...)
 (Mul32 ...) => (MULW ...)
 (Mul16 x y) => (MULW (SignExt16to32 x) (SignExt16to32 y))
 (Mul8 x y)  => (MULW (SignExt8to32 x)  (SignExt8to32 y))
index 0774d4c654f95728d024eee48fef23444567a82b..cb9051f954e82e3798e6c6c965aa0dd1082c2d9e 100644 (file)
@@ -123,6 +123,7 @@ func init() {
                gp01     = regInfo{outputs: []regMask{gpMask}}
                gp11     = regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}
                gp21     = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask}}
+               gp22     = regInfo{inputs: []regMask{gpMask, gpMask}, outputs: []regMask{gpMask, gpMask}}
                gpload   = regInfo{inputs: []regMask{gpspsbMask, 0}, outputs: []regMask{gpMask}}
                gp11sb   = regInfo{inputs: []regMask{gpspsbMask}, outputs: []regMask{gpMask}}
                gpxchg   = regInfo{inputs: []regMask{gpspsbgMask, gpgMask}, outputs: []regMask{gpMask}}
@@ -157,6 +158,8 @@ func init() {
                {name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true, typ: "Int32"},
                {name: "MULH", argLength: 2, reg: gp21, asm: "MULH", commutative: true, typ: "Int64"},
                {name: "MULHU", argLength: 2, reg: gp21, asm: "MULHU", commutative: true, typ: "UInt64"},
+               {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, return (hi, lo)
+
                {name: "DIV", argLength: 2, reg: gp21, asm: "DIV", typ: "Int64"}, // arg0 / arg1
                {name: "DIVU", argLength: 2, reg: gp21, asm: "DIVU", typ: "UInt64"},
                {name: "DIVW", argLength: 2, reg: gp21, asm: "DIVW", typ: "Int32"},
index df15c2edda49450934b617ae96d7c01753dfca17..7893ce837e7ba5bd24a3da2cc951572db5f8ad5b 100644 (file)
@@ -2069,6 +2069,7 @@ const (
        OpRISCV64MULW
        OpRISCV64MULH
        OpRISCV64MULHU
+       OpRISCV64LoweredMuluhilo
        OpRISCV64DIV
        OpRISCV64DIVU
        OpRISCV64DIVW
@@ -27603,6 +27604,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:            "LoweredMuluhilo",
+               argLen:          2,
+               resultNotInArgs: true,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                               {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+                       outputs: []outputInfo{
+                               {0, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                               {1, 1006632948}, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
        {
                name:   "DIV",
                argLen: 2,
index 431fb1aaf66e0dd288704707d9ed909780533d4b..f315c0d3a8a5b1793ec5fd6ba65a479b53bf96f5 100644 (file)
@@ -356,6 +356,9 @@ func rewriteValueRISCV64(v *Value) bool {
        case OpMul64F:
                v.Op = OpRISCV64FMULD
                return true
+       case OpMul64uhilo:
+               v.Op = OpRISCV64LoweredMuluhilo
+               return true
        case OpMul8:
                return rewriteValueRISCV64_OpMul8(v)
        case OpNeg16:
index b0f2585e3ab5abf6c70fba31bb74debbfeef925e..237135d5c7007733b19f593937d92f98a21bb4b6 100644 (file)
@@ -4505,9 +4505,9 @@ func InitTables() {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64)
-       alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE)
-       alias("runtime/internal/math", "Mul64", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE)
+               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64)
+       alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE, sys.ArchRISCV64)
+       alias("runtime/internal/math", "Mul64", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchS390X, sys.ArchMIPS64, sys.ArchMIPS64LE, sys.ArchRISCV64)
        addF("math/bits", "Add64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
index 03012eff5d8444a2e71e5f0a49ccc8e850ae004c..aecd84a78bb1c215d0fd4d22c5ed6a52984fb952 100644 (file)
@@ -710,6 +710,7 @@ func Mul64(x, y uint64) (hi, lo uint64) {
        // ppc64le:"MULHDU","MULLD"
        // s390x:"MLGR"
        // mips64: "MULVU"
+       // riscv64:"MULHU","MUL"
        return bits.Mul64(x, y)
 }
 
index 6296234d56588ff93a1d153414962c6c6d0a96cd..5d5adc3623c0334c8fe00119e0963d61759209e2 100644 (file)
@@ -1756,6 +1756,7 @@ var (
                "ppc64le": {"GOPPC64", "power8", "power9"},
                "s390x":   {},
                "wasm":    {},
+               "riscv64": {},
        }
 )