From: Marcel van Lohuizen Date: Thu, 5 Nov 2015 16:52:53 +0000 (+0100) Subject: unicode/utf8: removed uses of ranging over string X-Git-Tag: go1.6beta1~441 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9b299c1efd902842b788d7dc103512b6b2568ea9;p=gostls13.git unicode/utf8: removed uses of ranging over string Ranging over string is much slower than using DecodeRuneInString. See golang.org/issue/13162. Replacing ranging over a string with the implementation of the Bytes counterpart results in the following performance improvements: RuneCountInStringTenASCIIChars-8 43.0ns ± 1% 16.4ns ± 2% -61.80% (p=0.000 n=7+8) RuneCountInStringTenJapaneseChars-8 161ns ± 2% 154ns ± 2% -4.58% (p=0.000 n=8+8) ValidStringTenASCIIChars-8 52.2ns ± 1% 13.2ns ± 1% -74.62% (p=0.001 n=7+7) ValidStringTenJapaneseChars-8 173ns ± 2% 153ns ± 2% -11.78% (p=0.000 n=7+8) Update golang/go#13162 Change-Id: Ifc40a6a94bb3317f1f2d929d310bd2694645e9f6 Reviewed-on: https://go-review.googlesource.com/16695 Reviewed-by: Russ Cox --- diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 9ac37184d6..5d29ec080e 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -382,10 +382,16 @@ func RuneCount(p []byte) int { // RuneCountInString is like RuneCount but its input is a string. func RuneCountInString(s string) (n int) { - for range s { + for i := 0; i < len(s); { n++ + if s[i] < RuneSelf { + i++ + } else { + _, size := DecodeRuneInString(s[i:]) + i += size + } } - return + return n } // RuneStart reports whether the byte could be the first byte of @@ -415,16 +421,18 @@ func Valid(p []byte) bool { // ValidString reports whether s consists entirely of valid UTF-8-encoded runes. func ValidString(s string) bool { - for i, r := range s { - if r == RuneError { - // The RuneError value can be an error - // sentinel value (if it's size 1) or the same - // value encoded properly. Decode it to see if - // it's the 1 byte sentinel value. + for i := 0; i < len(s); { + if s[i] < RuneSelf { + i++ + } else { _, size := DecodeRuneInString(s[i:]) if size == 1 { + // All valid runes of size 1 (those + // below RuneSelf) were handled above. + // This must be a RuneError. return false } + i += size } } return true