From 751fbf9e5a73991fad24a48f1f523e446eb78291 Mon Sep 17 00:00:00 2001 From: Cuong Manh Le Date: Fri, 13 Sep 2024 00:48:11 +0700 Subject: [PATCH] unicode/utf8: use range loop in RuneCountInString MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit CL 28490 speeded up non-ASCII rune decoding, and ASCII rune is also decoded faster now. Benchmark using: perflock -governor 70% go test -run=NONE -bench=BenchmarkRuneCountInString -count=10 Result: name old time/op new time/op delta RuneCountInStringTenASCIIChars-8 10.2ns ± 0% 7.1ns ± 1% -30.53% (p=0.000 n=8+9) RuneCountInStringTenJapaneseChars-8 49.3ns ± 2% 38.5ns ± 2% -21.84% (p=0.000 n=8+8) Fixes #13162 Change-Id: Ifb01f3799c5c93e7f7c7af13a95becfde85ae807 Reviewed-on: https://go-review.googlesource.com/c/go/+/612617 Reviewed-by: Tim King Reviewed-by: Ian Lance Taylor Auto-Submit: Cuong Manh Le LUCI-TryBot-Result: Go LUCI Auto-Submit: Tim King --- src/unicode/utf8/utf8.go | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 1c1391d55b..9743b74258 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -449,35 +449,8 @@ func RuneCount(p []byte) int { // RuneCountInString is like [RuneCount] but its input is a string. func RuneCountInString(s string) (n int) { - ns := len(s) - for i := 0; i < ns; n++ { - c := s[i] - if c < RuneSelf { - // ASCII fast path - i++ - continue - } - x := first[c] - if x == xx { - i++ // invalid. - continue - } - size := int(x & 7) - if i+size > ns { - i++ // Short or invalid. - continue - } - accept := acceptRanges[x>>4] - if c := s[i+1]; c < accept.lo || accept.hi < c { - size = 1 - } else if size == 2 { - } else if c := s[i+2]; c < locb || hicb < c { - size = 1 - } else if size == 3 { - } else if c := s[i+3]; c < locb || hicb < c { - size = 1 - } - i += size + for range s { + n++ } return n } -- 2.48.1