From: Martin Möhrmann Date: Tue, 2 May 2017 08:09:18 +0000 (+0200) Subject: strings: speed up FieldsFunc X-Git-Tag: go1.10beta1~1628 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=fee7f2ab1652d510817a64459ffc0b458decc9ce;p=gostls13.git strings: speed up FieldsFunc Increases performance of FieldsFunc by recording the start and end of the fields in an array. The first 32 fields are saved in a pre-allocated array on the stack. This avoids the old behavior of iterating over the input string two times but uses more allocations when more than 32 fields are encountered. Additionally code for handling non-ASCII containing strings from Fields is removed and replaced by a call to the new faster FieldsFunc function. Overall this still leads to a slowdown for Fields on non-ASCII strings while speeding up Fields in general. name old time/op new time/op delta Fields/ASCII/16 116ns ± 5% 115ns ± 5% ~ (p=0.480 n=10+10) Fields/ASCII/256 765ns ± 1% 761ns ± 2% ~ (p=0.171 n=10+10) Fields/ASCII/4096 12.5µs ± 1% 12.7µs ± 1% +1.82% (p=0.000 n=10+10) Fields/ASCII/65536 226µs ± 1% 226µs ± 2% ~ (p=0.739 n=10+10) Fields/ASCII/1048576 5.12ms ± 1% 5.12ms ± 1% ~ (p=0.696 n=8+10) Fields/Mixed/16 172ns ± 1% 233ns ± 1% +35.90% (p=0.000 n=9+10) Fields/Mixed/256 1.18µs ± 2% 2.45µs ± 1% +107.47% (p=0.000 n=10+10) Fields/Mixed/4096 20.3µs ± 1% 43.1µs ± 2% +112.41% (p=0.000 n=10+10) Fields/Mixed/65536 364µs ± 1% 704µs ± 1% +93.56% (p=0.000 n=9+10) Fields/Mixed/1048576 7.07ms ± 2% 13.34ms ± 4% +88.83% (p=0.000 n=10+10) FieldsFunc/ASCII/16 274ns ± 1% 188ns ± 3% -31.44% (p=0.000 n=10+10) FieldsFunc/ASCII/256 3.69µs ± 1% 2.06µs ± 2% -44.26% (p=0.000 n=10+10) FieldsFunc/ASCII/4096 59.9µs ± 1% 35.3µs ± 2% -41.10% (p=0.000 n=10+10) FieldsFunc/ASCII/65536 958µs ± 1% 567µs ± 1% -40.82% (p=0.000 n=10+9) FieldsFunc/ASCII/1048576 16.3ms ± 2% 11.0ms ± 3% -32.52% (p=0.000 n=10+10) FieldsFunc/Mixed/16 309ns ± 1% 213ns ± 0% -30.98% (p=0.000 n=10+6) FieldsFunc/Mixed/256 3.83µs ± 1% 2.14µs ± 1% -44.01% (p=0.000 n=10+10) FieldsFunc/Mixed/4096 66.2µs ± 2% 37.8µs ± 1% -42.85% (p=0.000 n=10+10) FieldsFunc/Mixed/65536 1.09ms ± 1% 0.63ms ± 1% -42.73% (p=0.000 n=10+10) FieldsFunc/Mixed/1048576 18.6ms ± 3% 12.0ms ± 2% -35.50% (p=0.000 n=10+10) Fixes #17856 Fixes #19789 Change-Id: I9f5a560e534566fd81963651f342c8f44cfb0469 Reviewed-on: https://go-review.googlesource.com/42810 Reviewed-by: Joe Tsai --- diff --git a/src/strings/strings.go b/src/strings/strings.go index 52466e924d..099fb8031b 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -358,66 +358,7 @@ func Fields(s string) []string { } // Some runes in the input string are not ASCII. - // Same general approach as in the ASCII path but - // uses DecodeRuneInString and unicode.IsSpace if - // a non-ASCII rune needs to be decoded and checked - // if it corresponds to a space. - a := make([]string, 0, n) - i := 0 - // Skip spaces in the front of the input. - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - break - } - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - break - } - i += w - } - } - fieldStart := i - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - i++ - continue - } - a = append(a, s[fieldStart:i]) - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - i += w - continue - } - a = append(a, s[fieldStart:i]) - i += w - } - // Skip spaces in between fields. - for i < len(s) { - if c := s[i]; c < utf8.RuneSelf { - if asciiSpace[c] == 0 { - break - } - i++ - } else { - r, w := utf8.DecodeRuneInString(s[i:]) - if !unicode.IsSpace(r) { - break - } - i += w - } - } - fieldStart = i - } - if fieldStart < len(s) { // Last field might end at EOF. - a = append(a, s[fieldStart:]) - } - return a + return FieldsFunc(s, unicode.IsSpace) } // FieldsFunc splits the string s at each run of Unicode code points c satisfying f(c) @@ -426,35 +367,42 @@ func Fields(s string) []string { // FieldsFunc makes no guarantees about the order in which it calls f(c). // If f does not return consistent results for a given c, FieldsFunc may crash. func FieldsFunc(s string, f func(rune) bool) []string { - // First count the fields. - n := 0 - inField := false - for _, rune := range s { - wasInField := inField - inField = !f(rune) - if inField && !wasInField { - n++ - } + // A span is used to record a slice of s of the form s[start:end]. + // The start index is inclusive and the end index is exclusive. + type span struct { + start int + end int } + spans := make([]span, 0, 32) - // Now create them. - a := make([]string, n) - na := 0 - fieldStart := -1 // Set to -1 when looking for start of field. + // Find the field start and end indices. + wasField := false + fromIndex := 0 for i, rune := range s { if f(rune) { - if fieldStart >= 0 { - a[na] = s[fieldStart:i] - na++ - fieldStart = -1 + if wasField { + spans = append(spans, span{start: fromIndex, end: i}) + wasField = false + } + } else { + if !wasField { + fromIndex = i + wasField = true } - } else if fieldStart == -1 { - fieldStart = i } } - if fieldStart >= 0 { // Last field might end at EOF. - a[na] = s[fieldStart:] + + // Last field might end at EOF. + if wasField { + spans = append(spans, span{fromIndex, len(s)}) + } + + // Create strings from recorded field indices. + a := make([]string, len(spans)) + for i, span := range spans { + a[i] = s[span.start:span.end] } + return a }