Simple single-byte loop count for now, to be further improved in future
CLs.
Benchmark on linux/riscv64 (HiFive Unleashed):
name old time/op new time/op delta
CountSingle/10-4 190ns ± 1% 145ns ± 1% -23.66% (p=0.000 n=10+9)
CountSingle/32-4 422ns ± 1% 268ns ± 0% -36.43% (p=0.000 n=10+7)
CountSingle/4K-4 43.3µs ± 0% 23.8µs ± 0% -45.09% (p=0.000 n=8+10)
CountSingle/4M-4 54.2ms ± 1% 33.3ms ± 1% -38.48% (p=0.000 n=10+10)
CountSingle/64M-4 1.52s ± 1% 1.20s ± 1% -21.20% (p=0.000 n=9+9)
name old speed new speed delta
CountSingle/10-4 52.7MB/s ± 1% 69.1MB/s ± 1% +31.03% (p=0.000 n=10+9)
CountSingle/32-4 75.9MB/s ± 1% 119.5MB/s ± 0% +57.34% (p=0.000 n=10+8)
CountSingle/4K-4 94.6MB/s ± 0% 172.2MB/s ± 0% +82.10% (p=0.000 n=8+10)
CountSingle/4M-4 77.4MB/s ± 1% 125.8MB/s ± 1% +62.54% (p=0.000 n=10+10)
CountSingle/64M-4 44.2MB/s ± 1% 56.1MB/s ± 1% +26.91% (p=0.000 n=9+9)
Change-Id: I2a6bd50d22d5f598517bb3c5a50066c54280cac5
Reviewed-on: https://go-review.googlesource.com/c/go/+/263541
Trust: Tobias Klauser <tobias.klauser@gmail.com>
Run-TryBot: Tobias Klauser <tobias.klauser@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !amd64,!arm,!arm64,!ppc64le,!ppc64,!s390x
+// +build !amd64,!arm,!arm64,!ppc64le,!ppc64,!riscv64,!s390x
package bytealg
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build amd64 arm arm64 ppc64le ppc64 s390x
+// +build amd64 arm arm64 ppc64le ppc64 riscv64 s390x
package bytealg
--- /dev/null
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Count(SB),NOSPLIT,$0-40
+ MOV b_base+0(FP), A1
+ MOV b_len+8(FP), A2
+ MOVBU c+24(FP), A3 // byte to count
+ MOV ZERO, A4 // count
+ ADD A1, A2 // end
+
+loop:
+ BEQ A1, A2, done
+ MOVBU (A1), A5
+ ADD $1, A1
+ BNE A3, A5, loop
+ ADD $1, A4
+ JMP loop
+
+done:
+ MOV A4, ret+32(FP)
+ RET
+
+TEXT ·CountString(SB),NOSPLIT,$0-32
+ MOV s_base+0(FP), A1
+ MOV s_len+8(FP), A2
+ MOVBU c+16(FP), A3 // byte to count
+ MOV ZERO, A4 // count
+ ADD A1, A2 // end
+
+loop:
+ BEQ A1, A2, done
+ MOVBU (A1), A5
+ ADD $1, A1
+ BNE A3, A5, loop
+ ADD $1, A4
+ JMP loop
+
+done:
+ MOV A4, ret+24(FP)
+ RET