]> Cypherpunks repositories - gostls13.git/commitdiff
unicode/utf8: removed uses of ranging over string
authorMarcel van Lohuizen <mpvl@golang.org>
Thu, 5 Nov 2015 16:52:53 +0000 (17:52 +0100)
committerMarcel van Lohuizen <mpvl@golang.org>
Mon, 16 Nov 2015 11:29:13 +0000 (11:29 +0000)
Ranging over string is much slower than using DecodeRuneInString.
See golang.org/issue/13162.

Replacing ranging over a string with the implementation of the Bytes
counterpart results in the following performance improvements:

RuneCountInStringTenASCIIChars-8     43.0ns ± 1%  16.4ns ± 2%  -61.80%  (p=0.000 n=7+8)
RuneCountInStringTenJapaneseChars-8   161ns ± 2%   154ns ± 2%   -4.58%  (p=0.000 n=8+8)
ValidStringTenASCIIChars-8           52.2ns ± 1%  13.2ns ± 1%  -74.62%  (p=0.001 n=7+7)
ValidStringTenJapaneseChars-8         173ns ± 2%   153ns ± 2%  -11.78%  (p=0.000 n=7+8)

Update golang/go#13162

Change-Id: Ifc40a6a94bb3317f1f2d929d310bd2694645e9f6
Reviewed-on: https://go-review.googlesource.com/16695
Reviewed-by: Russ Cox <rsc@golang.org>
src/unicode/utf8/utf8.go

index 9ac37184d69aa4fd308db2280cfc1b343a779c7f..5d29ec080e789c9b407bd2fd7b499e93a20232d9 100644 (file)
@@ -382,10 +382,16 @@ func RuneCount(p []byte) int {
 
 // RuneCountInString is like RuneCount but its input is a string.
 func RuneCountInString(s string) (n int) {
-       for range s {
+       for i := 0; i < len(s); {
                n++
+               if s[i] < RuneSelf {
+                       i++
+               } else {
+                       _, size := DecodeRuneInString(s[i:])
+                       i += size
+               }
        }
-       return
+       return n
 }
 
 // RuneStart reports whether the byte could be the first byte of
@@ -415,16 +421,18 @@ func Valid(p []byte) bool {
 
 // ValidString reports whether s consists entirely of valid UTF-8-encoded runes.
 func ValidString(s string) bool {
-       for i, r := range s {
-               if r == RuneError {
-                       // The RuneError value can be an error
-                       // sentinel value (if it's size 1) or the same
-                       // value encoded properly. Decode it to see if
-                       // it's the 1 byte sentinel value.
+       for i := 0; i < len(s); {
+               if s[i] < RuneSelf {
+                       i++
+               } else {
                        _, size := DecodeRuneInString(s[i:])
                        if size == 1 {
+                               // All valid runes of size 1 (those
+                               // below RuneSelf) were handled above.
+                               // This must be a RuneError.
                                return false
                        }
+                       i += size
                }
        }
        return true