]> Cypherpunks repositories - gostls13.git/commitdiff
unicode/utf8: use range loop in RuneCountInString
authorCuong Manh Le <cuong.manhle.vn@gmail.com>
Thu, 12 Sep 2024 17:48:11 +0000 (00:48 +0700)
committerGopher Robot <gobot@golang.org>
Thu, 12 Sep 2024 19:10:46 +0000 (19:10 +0000)
CL 28490 speeded up non-ASCII rune decoding, and ASCII rune is also
decoded faster now.

Benchmark using:

    perflock -governor 70% go test -run=NONE -bench=BenchmarkRuneCountInString -count=10

Result:

name                                 old time/op  new time/op  delta
RuneCountInStringTenASCIIChars-8     10.2ns ± 0%   7.1ns ± 1%  -30.53%  (p=0.000 n=8+9)
RuneCountInStringTenJapaneseChars-8  49.3ns ± 2%  38.5ns ± 2%  -21.84%  (p=0.000 n=8+8)

Fixes #13162

Change-Id: Ifb01f3799c5c93e7f7c7af13a95becfde85ae807
Reviewed-on: https://go-review.googlesource.com/c/go/+/612617
Reviewed-by: Tim King <taking@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Cuong Manh Le <cuong.manhle.vn@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Tim King <taking@google.com>

src/unicode/utf8/utf8.go

index 1c1391d55b53b735989465c8ba9b51469a65e3b3..9743b742580c907f24eb6d2d8be0e9ac172387c0 100644 (file)
@@ -449,35 +449,8 @@ func RuneCount(p []byte) int {
 
 // RuneCountInString is like [RuneCount] but its input is a string.
 func RuneCountInString(s string) (n int) {
-       ns := len(s)
-       for i := 0; i < ns; n++ {
-               c := s[i]
-               if c < RuneSelf {
-                       // ASCII fast path
-                       i++
-                       continue
-               }
-               x := first[c]
-               if x == xx {
-                       i++ // invalid.
-                       continue
-               }
-               size := int(x & 7)
-               if i+size > ns {
-                       i++ // Short or invalid.
-                       continue
-               }
-               accept := acceptRanges[x>>4]
-               if c := s[i+1]; c < accept.lo || accept.hi < c {
-                       size = 1
-               } else if size == 2 {
-               } else if c := s[i+2]; c < locb || hicb < c {
-                       size = 1
-               } else if size == 3 {
-               } else if c := s[i+3]; c < locb || hicb < c {
-                       size = 1
-               }
-               i += size
+       for range s {
+               n++
        }
        return n
 }