]> Cypherpunks repositories - gostls13.git/commitdiff
unicode/utf8: speedup RuneCount
authorCuong Manh Le <cuong.manhle.vn@gmail.com>
Fri, 13 Sep 2024 03:15:51 +0000 (10:15 +0700)
committerGopher Robot <gobot@golang.org>
Sun, 15 Sep 2024 02:05:37 +0000 (02:05 +0000)
CL 612617 did speedup RuneCountInString, thus we can now use it to
speedup RuneCount, too.

name                         old time/op    new time/op    delta
RuneCountTenASCIIChars-8       8.69ns ± 1%    3.59ns ± 2%  -58.66%  (p=0.000 n=9+9)
RuneCountTenJapaneseChars-8    49.8ns ± 2%    40.9ns ± 0%  -17.94%  (p=0.000 n=10+8)

Change-Id: I311750c00efc79af35fb0ca3b482a5d94e0a7977
Reviewed-on: https://go-review.googlesource.com/c/go/+/612955
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Cuong Manh Le <cuong.manhle.vn@gmail.com>
Reviewed-by: Tim King <taking@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/unicode/utf8/utf8.go

index 9743b742580c907f24eb6d2d8be0e9ac172387c0..180c008ed5b05eeb7b7a96a8aa89ba4cff68743c 100644 (file)
@@ -414,35 +414,11 @@ func appendRuneNonASCII(p []byte, r rune) []byte {
 func RuneCount(p []byte) int {
        np := len(p)
        var n int
-       for i := 0; i < np; {
-               n++
-               c := p[i]
-               if c < RuneSelf {
-                       // ASCII fast path
-                       i++
-                       continue
-               }
-               x := first[c]
-               if x == xx {
-                       i++ // invalid.
-                       continue
-               }
-               size := int(x & 7)
-               if i+size > np {
-                       i++ // Short or invalid.
-                       continue
+       for ; n < np; n++ {
+               if c := p[n]; c >= RuneSelf {
+                       // non-ASCII slow path
+                       return n + RuneCountInString(string(p[n:]))
                }
-               accept := acceptRanges[x>>4]
-               if c := p[i+1]; c < accept.lo || accept.hi < c {
-                       size = 1
-               } else if size == 2 {
-               } else if c := p[i+2]; c < locb || hicb < c {
-                       size = 1
-               } else if size == 3 {
-               } else if c := p[i+3]; c < locb || hicb < c {
-                       size = 1
-               }
-               i += size
        }
        return n
 }