From: Cuong Manh Le Date: Fri, 13 Sep 2024 03:15:51 +0000 (+0700) Subject: unicode/utf8: speedup RuneCount X-Git-Tag: go1.24rc1~898 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3d33437c450aa74014ea1d41cd986b6ee6266984;p=gostls13.git unicode/utf8: speedup RuneCount CL 612617 did speedup RuneCountInString, thus we can now use it to speedup RuneCount, too. name old time/op new time/op delta RuneCountTenASCIIChars-8 8.69ns ± 1% 3.59ns ± 2% -58.66% (p=0.000 n=9+9) RuneCountTenJapaneseChars-8 49.8ns ± 2% 40.9ns ± 0% -17.94% (p=0.000 n=10+8) Change-Id: I311750c00efc79af35fb0ca3b482a5d94e0a7977 Reviewed-on: https://go-review.googlesource.com/c/go/+/612955 Reviewed-by: Dmitri Shuralyov Auto-Submit: Cuong Manh Le Reviewed-by: Tim King LUCI-TryBot-Result: Go LUCI --- diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 9743b74258..180c008ed5 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -414,35 +414,11 @@ func appendRuneNonASCII(p []byte, r rune) []byte { func RuneCount(p []byte) int { np := len(p) var n int - for i := 0; i < np; { - n++ - c := p[i] - if c < RuneSelf { - // ASCII fast path - i++ - continue - } - x := first[c] - if x == xx { - i++ // invalid. - continue - } - size := int(x & 7) - if i+size > np { - i++ // Short or invalid. - continue + for ; n < np; n++ { + if c := p[n]; c >= RuneSelf { + // non-ASCII slow path + return n + RuneCountInString(string(p[n:])) } - accept := acceptRanges[x>>4] - if c := p[i+1]; c < accept.lo || accept.hi < c { - size = 1 - } else if size == 2 { - } else if c := p[i+2]; c < locb || hicb < c { - size = 1 - } else if size == 3 { - } else if c := p[i+3]; c < locb || hicb < c { - size = 1 - } - i += size } return n }