]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile,internal/cpu,runtime: intrinsify math/bits.OnesCount on riscv64
authorJoel Sing <joel@sing.id.au>
Wed, 19 Mar 2025 12:57:23 +0000 (23:57 +1100)
committerJoel Sing <joel@sing.id.au>
Thu, 1 May 2025 12:57:41 +0000 (05:57 -0700)
For riscv64/rva22u64 and above, we can intrinsify math/bits.OnesCount
using the CPOP/CPOPW machine instructions. Since the native Go
implementation of OnesCount is relatively expensive, it is also
worth emitting a check for Zbb support when compiled for rva20u64.

On a Banana Pi F3, with GORISCV64=rva22u64:

              │     oc.1     │                oc.2                 │
              │    sec/op    │   sec/op     vs base                │
OnesCount-8     16.930n ± 0%   4.389n ± 0%  -74.08% (p=0.000 n=10)
OnesCount8-8     5.642n ± 0%   5.016n ± 0%  -11.10% (p=0.000 n=10)
OnesCount16-8    9.404n ± 0%   5.015n ± 0%  -46.67% (p=0.000 n=10)
OnesCount32-8   13.165n ± 0%   4.388n ± 0%  -66.67% (p=0.000 n=10)
OnesCount64-8   16.300n ± 0%   4.388n ± 0%  -73.08% (p=0.000 n=10)
geomean          11.40n        4.629n       -59.40%

On a Banana Pi F3, compiled with GORISCV64=rva20u64 and with Zbb
detection enabled:

              │     oc.3     │                oc.4                 │
              │    sec/op    │   sec/op     vs base                │
OnesCount-8     16.930n ± 0%   5.643n ± 0%  -66.67% (p=0.000 n=10)
OnesCount8-8     5.642n ± 0%   5.642n ± 0%        ~ (p=0.447 n=10)
OnesCount16-8   10.030n ± 0%   6.896n ± 0%  -31.25% (p=0.000 n=10)
OnesCount32-8   13.170n ± 0%   5.642n ± 0%  -57.16% (p=0.000 n=10)
OnesCount64-8   16.300n ± 0%   5.642n ± 0%  -65.39% (p=0.000 n=10)
geomean          11.55n        5.873n       -49.16%

On a Banana Pi F3, compiled with GORISCV64=rva20u64 but with Zbb
detection disabled:

              │    oc.3     │                oc.5                 │
              │   sec/op    │   sec/op     vs base                │
OnesCount-8     16.93n ± 0%   29.47n ± 0%  +74.07% (p=0.000 n=10)
OnesCount8-8    5.642n ± 0%   5.643n ± 0%        ~ (p=0.191 n=10)
OnesCount16-8   10.03n ± 0%   15.05n ± 0%  +50.05% (p=0.000 n=10)
OnesCount32-8   13.17n ± 0%   18.18n ± 0%  +38.04% (p=0.000 n=10)
OnesCount64-8   16.30n ± 0%   21.94n ± 0%  +34.60% (p=0.000 n=10)
geomean         11.55n        15.84n       +37.16%

For hardware without Zbb, this adds ~5ns overhead, while for hardware
with Zbb we achieve a performance gain up of up to 11ns. It is worth
noting that OnesCount8 is cheap enough that it is preferable to stick
with the generic version in this case.

Change-Id: Id657e40e0dd1b1ab8cc0fe0f8a68df4c9f2d7da5
Reviewed-on: https://go-review.googlesource.com/c/go/+/660856
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

18 files changed:
src/cmd/compile/internal/ir/symtab.go
src/cmd/compile/internal/riscv64/ssa.go
src/cmd/compile/internal/ssa/_gen/RISCV64.rules
src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteRISCV64.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/compile/internal/typecheck/_builtin/runtime.go
src/cmd/compile/internal/typecheck/builtin.go
src/cmd/internal/goobj/builtinlist.go
src/internal/cpu/cpu.go
src/internal/cpu/cpu_riscv64.go
src/internal/cpu/cpu_riscv64_linux.go
src/runtime/cpuflags.go
src/runtime/proc.go
test/codegen/mathbits.go

index 820916316cbff8261e79a60c5eccea8ea101b3ee..00b07cb45c3dc8ed79e3d2e26dc5c8bd59d07d8f 100644 (file)
@@ -64,6 +64,7 @@ type symsStruct struct {
        Loong64HasLAMCAS *obj.LSym
        Loong64HasLAM_BH *obj.LSym
        Loong64HasLSX    *obj.LSym
+       RISCV64HasZbb    *obj.LSym
        X86HasFMA        *obj.LSym
        X86HasPOPCNT     *obj.LSym
        X86HasSSE41      *obj.LSym
index 4428d359a8eea594e3eb6fa7468893b95c4328ad..21edcabc58b3a85bc7e36622989190ebad132c31 100644 (file)
@@ -420,7 +420,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
                ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
                ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,
-               ssa.OpRISCV64REV8:
+               ssa.OpRISCV64REV8, ssa.OpRISCV64CPOP, ssa.OpRISCV64CPOPW:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[0].Reg()
index b8b0429de28ea9cd0e1573e716ad746b35ee877a..80061ada2cfdaf1864709a3b71637108c6fd7825 100644 (file)
 (Bswap32 <t> x) => (SRLI [32] (REV8 <t> x))
 (Bswap16 <t> x) => (SRLI [48] (REV8 <t> x))
 
+// Population count (note that these will be emitted with guards for rva20u64).
+(PopCount64 ...) => (CPOP  ...)
+(PopCount32 ...) => (CPOPW ...)
+(PopCount16 x) => (CPOP (ZeroExt16to64 x))
+(PopCount8  x) => (CPOP (ZeroExt8to64  x))
+
 (Less64  ...) => (SLT  ...)
 (Less32  x y) => (SLT  (SignExt32to64 x) (SignExt32to64 y))
 (Less16  x y) => (SLT  (SignExt16to64 x) (SignExt16to64 y))
index 86412ce8a6769824029f9ed392596243213e097f..8cb042a604bee693247093c62b3c65955f5aaf3e 100644 (file)
@@ -231,6 +231,8 @@ func init() {
                {name: "ANDI", argLength: 1, reg: gp11, asm: "ANDI", aux: "Int64"},      // arg0 & auxint
                {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"},                      // count leading zeros
                {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},                    // count leading zeros of least significant word
+               {name: "CPOP", argLength: 1, reg: gp11, asm: "CPOP"},                    // count set bits
+               {name: "CPOPW", argLength: 1, reg: gp11, asm: "CPOPW"},                  // count set bits in least significant word
                {name: "CTZ", argLength: 1, reg: gp11, asm: "CTZ"},                      // count trailing zeros
                {name: "CTZW", argLength: 1, reg: gp11, asm: "CTZW"},                    // count trailing zeros of least significant word
                {name: "NOT", argLength: 1, reg: gp11, asm: "NOT"},                      // ^arg0
index 6eeb90721b03fa29188ccca364e5b6af2c4194dd..20dfb05741c2402cd3193084194458231c7e175e 100644 (file)
@@ -2514,6 +2514,8 @@ const (
        OpRISCV64ANDI
        OpRISCV64CLZ
        OpRISCV64CLZW
+       OpRISCV64CPOP
+       OpRISCV64CPOPW
        OpRISCV64CTZ
        OpRISCV64CTZW
        OpRISCV64NOT
@@ -33887,6 +33889,32 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "CPOP",
+               argLen: 1,
+               asm:    riscv.ACPOP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+                       outputs: []outputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
+       {
+               name:   "CPOPW",
+               argLen: 1,
+               asm:    riscv.ACPOPW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+                       outputs: []outputInfo{
+                               {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+                       },
+               },
+       },
        {
                name:   "CTZ",
                argLen: 1,
index d0e2c909e0d58e1522bbd856f0683c5c3b66a1b1..4e53ae5fe63f30070d21a9f69100c24a2288cbdd 100644 (file)
@@ -487,6 +487,16 @@ func rewriteValueRISCV64(v *Value) bool {
                return true
        case OpPanicBounds:
                return rewriteValueRISCV64_OpPanicBounds(v)
+       case OpPopCount16:
+               return rewriteValueRISCV64_OpPopCount16(v)
+       case OpPopCount32:
+               v.Op = OpRISCV64CPOPW
+               return true
+       case OpPopCount64:
+               v.Op = OpRISCV64CPOP
+               return true
+       case OpPopCount8:
+               return rewriteValueRISCV64_OpPopCount8(v)
        case OpPubBarrier:
                v.Op = OpRISCV64LoweredPubBarrier
                return true
@@ -3458,6 +3468,36 @@ func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
        }
        return false
 }
+func rewriteValueRISCV64_OpPopCount16(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (PopCount16 x)
+       // result: (CPOP (ZeroExt16to64 x))
+       for {
+               x := v_0
+               v.reset(OpRISCV64CPOP)
+               v0 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueRISCV64_OpPopCount8(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (PopCount8 x)
+       // result: (CPOP (ZeroExt8to64 x))
+       for {
+               x := v_0
+               v.reset(OpRISCV64CPOP)
+               v0 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValueRISCV64_OpRISCV64ADD(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 86ab98118d8b1814cdb273916b3a5f17d5efa88d..78350723da2bf76370ed91739cdc4b6713689048 100644 (file)
@@ -1129,12 +1129,49 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                }
        }
 
+       makeOnesCountRISCV64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+               return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+                       if cfg.goriscv64 >= 22 {
+                               return s.newValue1(op, types.Types[types.TINT], args[0])
+                       }
+
+                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.RISCV64HasZbb, s.sb)
+                       v := s.load(types.Types[types.TBOOL], addr)
+                       b := s.endBlock()
+                       b.Kind = ssa.BlockIf
+                       b.SetControl(v)
+                       bTrue := s.f.NewBlock(ssa.BlockPlain)
+                       bFalse := s.f.NewBlock(ssa.BlockPlain)
+                       bEnd := s.f.NewBlock(ssa.BlockPlain)
+                       b.AddEdgeTo(bTrue)
+                       b.AddEdgeTo(bFalse)
+                       b.Likely = ssa.BranchLikely // Majority of RISC-V support Zbb.
+
+                       // We have the intrinsic - use it directly.
+                       s.startBlock(bTrue)
+                       s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Call the pure Go version.
+                       s.startBlock(bFalse)
+                       s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
+                       s.endBlock().AddEdgeTo(bEnd)
+
+                       // Merge results.
+                       s.startBlock(bEnd)
+                       return s.variable(n, types.Types[types.TINT])
+               }
+       }
+
        addF("math/bits", "OnesCount64",
                makeOnesCountAMD64(ssa.OpPopCount64),
                sys.AMD64)
        addF("math/bits", "OnesCount64",
                makeOnesCountLoong64(ssa.OpPopCount64),
                sys.Loong64)
+       addF("math/bits", "OnesCount64",
+               makeOnesCountRISCV64(ssa.OpPopCount64),
+               sys.RISCV64)
        addF("math/bits", "OnesCount64",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
@@ -1146,6 +1183,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
        addF("math/bits", "OnesCount32",
                makeOnesCountLoong64(ssa.OpPopCount32),
                sys.Loong64)
+       addF("math/bits", "OnesCount32",
+               makeOnesCountRISCV64(ssa.OpPopCount32),
+               sys.RISCV64)
        addF("math/bits", "OnesCount32",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
@@ -1157,6 +1197,9 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
        addF("math/bits", "OnesCount16",
                makeOnesCountLoong64(ssa.OpPopCount16),
                sys.Loong64)
+       addF("math/bits", "OnesCount16",
+               makeOnesCountRISCV64(ssa.OpPopCount16),
+               sys.RISCV64)
        addF("math/bits", "OnesCount16",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
@@ -1167,6 +1210,13 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                        return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0])
                },
                sys.S390X, sys.PPC64, sys.Wasm)
+
+       if cfg.goriscv64 >= 22 {
+               addF("math/bits", "OnesCount8",
+                       makeOnesCountRISCV64(ssa.OpPopCount8),
+                       sys.RISCV64)
+       }
+
        alias("math/bits", "OnesCount", "math/bits", "OnesCount64", p8...)
 
        addF("math/bits", "Mul64",
index e6275734f2c57cfb422841e48a045c8611c5144a..5d3b0519b7609b40b10b52807b6945c7439f4b20 100644 (file)
@@ -1114,6 +1114,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"riscv64", "internal/runtime/sys", "GetClosurePtr"}:               struct{}{},
        {"riscv64", "internal/runtime/sys", "Len64"}:                       struct{}{},
        {"riscv64", "internal/runtime/sys", "Len8"}:                        struct{}{},
+       {"riscv64", "internal/runtime/sys", "OnesCount64"}:                 struct{}{},
        {"riscv64", "internal/runtime/sys", "TrailingZeros32"}:             struct{}{},
        {"riscv64", "internal/runtime/sys", "TrailingZeros64"}:             struct{}{},
        {"riscv64", "internal/runtime/sys", "TrailingZeros8"}:              struct{}{},
@@ -1131,6 +1132,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"riscv64", "math/bits", "Len8"}:                                   struct{}{},
        {"riscv64", "math/bits", "Mul"}:                                    struct{}{},
        {"riscv64", "math/bits", "Mul64"}:                                  struct{}{},
+       {"riscv64", "math/bits", "OnesCount"}:                              struct{}{},
+       {"riscv64", "math/bits", "OnesCount16"}:                            struct{}{},
+       {"riscv64", "math/bits", "OnesCount32"}:                            struct{}{},
+       {"riscv64", "math/bits", "OnesCount64"}:                            struct{}{},
+       {"riscv64", "math/bits", "OnesCount8"}:                             struct{}{},
        {"riscv64", "math/bits", "ReverseBytes16"}:                         struct{}{},
        {"riscv64", "math/bits", "ReverseBytes32"}:                         struct{}{},
        {"riscv64", "math/bits", "ReverseBytes64"}:                         struct{}{},
index 0b77a1334ffdb8e1c550253a5eae1edf49340e9f..acb037dd56f0326a979d2c45ad0dd85f174cdc57 100644 (file)
@@ -160,6 +160,7 @@ func InitConfig() {
        ir.Syms.Loong64HasLAMCAS = typecheck.LookupRuntimeVar("loong64HasLAMCAS") // bool
        ir.Syms.Loong64HasLAM_BH = typecheck.LookupRuntimeVar("loong64HasLAM_BH") // bool
        ir.Syms.Loong64HasLSX = typecheck.LookupRuntimeVar("loong64HasLSX")       // bool
+       ir.Syms.RISCV64HasZbb = typecheck.LookupRuntimeVar("riscv64HasZbb")       // bool
        ir.Syms.Staticuint64s = typecheck.LookupRuntimeVar("staticuint64s")
        ir.Syms.Typedmemmove = typecheck.LookupRuntimeFunc("typedmemmove")
        ir.Syms.Udiv = typecheck.LookupRuntimeVar("udiv")                 // asm func with special ABI
index 8a92c490612f2b50726e5b78b2dffbb8463bf578..a1397b32b352412e9eeeaaf5b6de5f1be19e8db3 100644 (file)
@@ -294,5 +294,6 @@ var arm64HasATOMICS bool
 var loong64HasLAMCAS bool
 var loong64HasLAM_BH bool
 var loong64HasLSX bool
+var riscv64HasZbb bool
 
 func asanregisterglobals(unsafe.Pointer, uintptr)
index 4c12ce6220daa209fd483d38538f3a4a429a8aa6..f3ab6766ec8b2be86fb769e5efd0d727e26a6c01 100644 (file)
@@ -242,6 +242,7 @@ var runtimeDecls = [...]struct {
        {"loong64HasLAMCAS", varTag, 6},
        {"loong64HasLAM_BH", varTag, 6},
        {"loong64HasLSX", varTag, 6},
+       {"riscv64HasZbb", varTag, 6},
        {"asanregisterglobals", funcTag, 130},
 }
 
index 3e550d8dd9763b8f3df6e881f974ecb3148fc872..9e21544391b47d6ab0fb80f0335ba02e8ec2536e 100644 (file)
@@ -221,6 +221,7 @@ var builtins = [...]struct {
        {"runtime.loong64HasLAMCAS", 0},
        {"runtime.loong64HasLAM_BH", 0},
        {"runtime.loong64HasLSX", 0},
+       {"runtime.riscv64HasZbb", 0},
        {"runtime.asanregisterglobals", 1},
        {"runtime.deferproc", 1},
        {"runtime.deferprocStack", 1},
index 4c945e4b96d242283a7194c7e753d65df64c8886..e07463f87025250d9fb436a3115d3e187937c178 100644 (file)
@@ -145,6 +145,7 @@ var RISCV64 struct {
        _                 CacheLinePad
        HasFastMisaligned bool // Fast misaligned accesses
        HasV              bool // Vector extension compatible with RVV 1.0
+       HasZbb            bool // Basic bit-manipulation extension
        _                 CacheLinePad
 }
 
index e6e532c7e7e18f5c5279117fccee1bc521b7639c..0fe1704855fa0c48be8ef2c97727837f6c6a51d4 100644 (file)
@@ -12,6 +12,7 @@ func doinit() {
        options = []option{
                {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
                {Name: "v", Feature: &RISCV64.HasV},
+               {Name: "zbb", Feature: &RISCV64.HasZbb},
        }
        osInit()
 }
index a076d3e33ce2a4d7ce7f5075c2e0423a168c5fe7..b67bdf58766f48bd5e8caeb03b7580a95562de0a 100644 (file)
@@ -50,6 +50,7 @@ const (
        // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
        riscv_HWPROBE_KEY_IMA_EXT_0   = 0x4
        riscv_HWPROBE_IMA_V           = 0x4
+       riscv_HWPROBE_EXT_ZBB         = 0x10
        riscv_HWPROBE_KEY_CPUPERF_0   = 0x5
        riscv_HWPROBE_MISALIGNED_FAST = 0x3
        riscv_HWPROBE_MISALIGNED_MASK = 0x7
@@ -83,6 +84,7 @@ func osInit() {
        if pairs[0].key != -1 {
                v := uint(pairs[0].value)
                RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
+               RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
        }
        if pairs[1].key != -1 {
                v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK
index 06424642c71ba4abb641393e4473a14bbf7963c9..bd1cb328d37b87e2f1097758888924a63cf59342 100644 (file)
@@ -38,4 +38,6 @@ var (
        loong64HasLAMCAS bool
        loong64HasLAM_BH bool
        loong64HasLSX    bool
+
+       riscv64HasZbb bool
 )
index db7a5b2bb1c654568139595b070de4c9dab24c3f..6929c70fb7397ff1aeaf524d39d08742adcf6a03 100644 (file)
@@ -778,6 +778,9 @@ func cpuinit(env string) {
                loong64HasLAMCAS = cpu.Loong64.HasLAMCAS
                loong64HasLAM_BH = cpu.Loong64.HasLAM_BH
                loong64HasLSX = cpu.Loong64.HasLSX
+
+       case "riscv64":
+               riscv64HasZbb = cpu.RISCV64.HasZbb
        }
 }
 
index e9dfbb1443e2e360088d35080d7d02f83ccce094..c7ba357d09b97a6f640abd6310e6ff82472b25b3 100644 (file)
@@ -181,8 +181,9 @@ func OnesCount(n uint) int {
        // amd64:"POPCNTQ"
        // arm64:"VCNT","VUADDLV"
        // loong64:"VPCNTV"
-       // s390x:"POPCNT"
        // ppc64x:"POPCNTD"
+       // riscv64:"CPOP\t"
+       // s390x:"POPCNT"
        // wasm:"I64Popcnt"
        return bits.OnesCount(n)
 }
@@ -192,8 +193,9 @@ func OnesCount64(n uint64) int {
        // amd64:"POPCNTQ"
        // arm64:"VCNT","VUADDLV"
        // loong64:"VPCNTV"
-       // s390x:"POPCNT"
        // ppc64x:"POPCNTD"
+       // riscv64:"CPOP\t"
+       // s390x:"POPCNT"
        // wasm:"I64Popcnt"
        return bits.OnesCount64(n)
 }
@@ -203,8 +205,9 @@ func OnesCount32(n uint32) int {
        // amd64:"POPCNTL"
        // arm64:"VCNT","VUADDLV"
        // loong64:"VPCNTW"
-       // s390x:"POPCNT"
        // ppc64x:"POPCNTW"
+       // riscv64:"CPOPW"
+       // s390x:"POPCNT"
        // wasm:"I64Popcnt"
        return bits.OnesCount32(n)
 }
@@ -214,15 +217,17 @@ func OnesCount16(n uint16) int {
        // amd64:"POPCNTL"
        // arm64:"VCNT","VUADDLV"
        // loong64:"VPCNTH"
-       // s390x:"POPCNT"
        // ppc64x:"POPCNTW"
+       // riscv64:"CPOP\t"
+       // s390x:"POPCNT"
        // wasm:"I64Popcnt"
        return bits.OnesCount16(n)
 }
 
 func OnesCount8(n uint8) int {
-       // s390x:"POPCNT"
        // ppc64x:"POPCNTB"
+       // riscv64/rva22u64,riscv64/rva23u64:"CPOP\t"
+       // s390x:"POPCNT"
        // wasm:"I64Popcnt"
        return bits.OnesCount8(n)
 }