]> Cypherpunks repositories - gostls13.git/commitdiff
strings: speed up FieldsFunc
authorMartin Möhrmann <moehrmann@google.com>
Tue, 2 May 2017 08:09:18 +0000 (10:09 +0200)
committerMartin Möhrmann <moehrmann@google.com>
Sun, 13 Aug 2017 09:58:01 +0000 (09:58 +0000)
Increases performance of FieldsFunc by recording the start and end
of the fields in an array. The first 32 fields are saved in a pre-allocated
array on the stack. This avoids the old behavior of iterating over the
input string two times but uses more allocations when more than 32 fields
are encountered.

Additionally code for handling non-ASCII containing strings from Fields is
removed and replaced by a call to the new faster FieldsFunc function.

Overall this still leads to a slowdown for Fields on non-ASCII strings
while speeding up Fields in general.

name                      old time/op    new time/op     delta
Fields/ASCII/16              116ns ± 5%      115ns ± 5%       ~     (p=0.480 n=10+10)
Fields/ASCII/256             765ns ± 1%      761ns ± 2%       ~     (p=0.171 n=10+10)
Fields/ASCII/4096           12.5µs ± 1%     12.7µs ± 1%     +1.82%  (p=0.000 n=10+10)
Fields/ASCII/65536           226µs ± 1%      226µs ± 2%       ~     (p=0.739 n=10+10)
Fields/ASCII/1048576        5.12ms ± 1%     5.12ms ± 1%       ~     (p=0.696 n=8+10)
Fields/Mixed/16              172ns ± 1%      233ns ± 1%    +35.90%  (p=0.000 n=9+10)
Fields/Mixed/256            1.18µs ± 2%     2.45µs ± 1%   +107.47%  (p=0.000 n=10+10)
Fields/Mixed/4096           20.3µs ± 1%     43.1µs ± 2%   +112.41%  (p=0.000 n=10+10)
Fields/Mixed/65536           364µs ± 1%      704µs ± 1%    +93.56%  (p=0.000 n=9+10)
Fields/Mixed/1048576        7.07ms ± 2%    13.34ms ± 4%    +88.83%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16          274ns ± 1%      188ns ± 3%    -31.44%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256        3.69µs ± 1%     2.06µs ± 2%    -44.26%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096       59.9µs ± 1%     35.3µs ± 2%    -41.10%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536       958µs ± 1%      567µs ± 1%    -40.82%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576    16.3ms ± 2%     11.0ms ± 3%    -32.52%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          309ns ± 1%      213ns ± 0%    -30.98%  (p=0.000 n=10+6)
FieldsFunc/Mixed/256        3.83µs ± 1%     2.14µs ± 1%    -44.01%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096       66.2µs ± 2%     37.8µs ± 1%    -42.85%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536      1.09ms ± 1%     0.63ms ± 1%    -42.73%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    18.6ms ± 3%     12.0ms ± 2%    -35.50%  (p=0.000 n=10+10)

Fixes #17856
Fixes #19789

Change-Id: I9f5a560e534566fd81963651f342c8f44cfb0469
Reviewed-on: https://go-review.googlesource.com/42810
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
src/strings/strings.go

index 52466e924debbda89110e2cd2e2d8e0d4e2ce930..099fb8031bdf1cc2273ced056cd6942ccefb92f6 100644 (file)
@@ -358,66 +358,7 @@ func Fields(s string) []string {
        }
 
        // Some runes in the input string are not ASCII.
-       // Same general approach as in the ASCII path but
-       // uses DecodeRuneInString and unicode.IsSpace if
-       // a non-ASCII rune needs to be decoded and checked
-       // if it corresponds to a space.
-       a := make([]string, 0, n)
-       i := 0
-       // Skip spaces in the front of the input.
-       for i < len(s) {
-               if c := s[i]; c < utf8.RuneSelf {
-                       if asciiSpace[c] == 0 {
-                               break
-                       }
-                       i++
-               } else {
-                       r, w := utf8.DecodeRuneInString(s[i:])
-                       if !unicode.IsSpace(r) {
-                               break
-                       }
-                       i += w
-               }
-       }
-       fieldStart := i
-       for i < len(s) {
-               if c := s[i]; c < utf8.RuneSelf {
-                       if asciiSpace[c] == 0 {
-                               i++
-                               continue
-                       }
-                       a = append(a, s[fieldStart:i])
-                       i++
-               } else {
-                       r, w := utf8.DecodeRuneInString(s[i:])
-                       if !unicode.IsSpace(r) {
-                               i += w
-                               continue
-                       }
-                       a = append(a, s[fieldStart:i])
-                       i += w
-               }
-               // Skip spaces in between fields.
-               for i < len(s) {
-                       if c := s[i]; c < utf8.RuneSelf {
-                               if asciiSpace[c] == 0 {
-                                       break
-                               }
-                               i++
-                       } else {
-                               r, w := utf8.DecodeRuneInString(s[i:])
-                               if !unicode.IsSpace(r) {
-                                       break
-                               }
-                               i += w
-                       }
-               }
-               fieldStart = i
-       }
-       if fieldStart < len(s) { // Last field might end at EOF.
-               a = append(a, s[fieldStart:])
-       }
-       return a
+       return FieldsFunc(s, unicode.IsSpace)
 }
 
 // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c)
@@ -426,35 +367,42 @@ func Fields(s string) []string {
 // FieldsFunc makes no guarantees about the order in which it calls f(c).
 // If f does not return consistent results for a given c, FieldsFunc may crash.
 func FieldsFunc(s string, f func(rune) bool) []string {
-       // First count the fields.
-       n := 0
-       inField := false
-       for _, rune := range s {
-               wasInField := inField
-               inField = !f(rune)
-               if inField && !wasInField {
-                       n++
-               }
+       // A span is used to record a slice of s of the form s[start:end].
+       // The start index is inclusive and the end index is exclusive.
+       type span struct {
+               start int
+               end   int
        }
+       spans := make([]span, 0, 32)
 
-       // Now create them.
-       a := make([]string, n)
-       na := 0
-       fieldStart := -1 // Set to -1 when looking for start of field.
+       // Find the field start and end indices.
+       wasField := false
+       fromIndex := 0
        for i, rune := range s {
                if f(rune) {
-                       if fieldStart >= 0 {
-                               a[na] = s[fieldStart:i]
-                               na++
-                               fieldStart = -1
+                       if wasField {
+                               spans = append(spans, span{start: fromIndex, end: i})
+                               wasField = false
+                       }
+               } else {
+                       if !wasField {
+                               fromIndex = i
+                               wasField = true
                        }
-               } else if fieldStart == -1 {
-                       fieldStart = i
                }
        }
-       if fieldStart >= 0 { // Last field might end at EOF.
-               a[na] = s[fieldStart:]
+
+       // Last field might end at EOF.
+       if wasField {
+               spans = append(spans, span{fromIndex, len(s)})
+       }
+
+       // Create strings from recorded field indices.
+       a := make([]string, len(spans))
+       for i, span := range spans {
+               a[i] = s[span.start:span.end]
        }
+
        return a
 }