]> Cypherpunks repositories - gostls13.git/commitdiff
bytes: speed up Fields and FieldsFunc
authorMartin Möhrmann <moehrmann@google.com>
Sat, 12 Aug 2017 08:20:30 +0000 (10:20 +0200)
committerMartin Möhrmann <moehrmann@google.com>
Mon, 14 Aug 2017 21:08:51 +0000 (21:08 +0000)
Applies the optimizations from golang.org/cl/42810 and golang.org/cl/37959
done to the strings package to the bytes package.

name                      old time/op    new time/op     delta
Fields/ASCII/16              417ns ± 4%      118ns ± 3%    -71.65%  (p=0.000 n=10+10)
Fields/ASCII/256            5.95µs ± 3%     0.88µs ± 0%    -85.23%  (p=0.000 n=10+7)
Fields/ASCII/4096           92.3µs ± 1%     12.8µs ± 2%    -86.13%  (p=0.000 n=10+10)
Fields/ASCII/65536          1.49ms ± 1%     0.25ms ± 1%    -83.14%  (p=0.000 n=10+10)
Fields/ASCII/1048576        25.0ms ± 1%      6.5ms ± 2%    -74.04%  (p=0.000 n=10+10)
Fields/Mixed/16              406ns ± 1%      222ns ± 1%    -45.24%  (p=0.000 n=10+9)
Fields/Mixed/256            5.78µs ± 1%     2.27µs ± 1%    -60.73%  (p=0.000 n=9+10)
Fields/Mixed/4096           97.9µs ± 1%     40.5µs ± 3%    -58.66%  (p=0.000 n=10+10)
Fields/Mixed/65536          1.58ms ± 1%     0.69ms ± 1%    -56.58%  (p=0.000 n=10+10)
Fields/Mixed/1048576        26.6ms ± 1%     12.6ms ± 2%    -52.44%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16          395ns ± 1%      188ns ± 1%    -52.34%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256        5.90µs ± 1%     2.00µs ± 1%    -66.06%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096       92.5µs ± 1%     33.0µs ± 1%    -64.34%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536      1.48ms ± 1%     0.54ms ± 1%    -63.38%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576    25.1ms ± 1%     10.5ms ± 3%    -58.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          401ns ± 1%      205ns ± 2%    -48.87%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256        5.70µs ± 1%     1.98µs ± 1%    -65.28%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096       97.5µs ± 1%     35.4µs ± 1%    -63.65%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536      1.57ms ± 1%     0.61ms ± 1%    -61.20%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    26.5ms ± 1%     11.4ms ± 2%    -56.84%  (p=0.000 n=10+10)

name                      old speed      new speed       delta
Fields/ASCII/16           38.4MB/s ± 4%  134.9MB/s ± 3%   +251.55%  (p=0.000 n=10+10)
Fields/ASCII/256          43.0MB/s ± 3%  290.6MB/s ± 1%   +575.97%  (p=0.000 n=10+8)
Fields/ASCII/4096         44.4MB/s ± 1%  320.0MB/s ± 2%   +620.90%  (p=0.000 n=10+10)
Fields/ASCII/65536        44.0MB/s ± 1%  260.7MB/s ± 1%   +493.15%  (p=0.000 n=10+10)
Fields/ASCII/1048576      42.0MB/s ± 1%  161.6MB/s ± 2%   +285.21%  (p=0.000 n=10+10)
Fields/Mixed/16           39.4MB/s ± 1%   71.7MB/s ± 1%    +82.20%  (p=0.000 n=10+10)
Fields/Mixed/256          44.3MB/s ± 1%  112.8MB/s ± 1%   +154.64%  (p=0.000 n=9+10)
Fields/Mixed/4096         41.9MB/s ± 1%  101.2MB/s ± 3%   +141.92%  (p=0.000 n=10+10)
Fields/Mixed/65536        41.5MB/s ± 1%   95.5MB/s ± 1%   +130.29%  (p=0.000 n=10+10)
Fields/Mixed/1048576      39.4MB/s ± 1%   82.9MB/s ± 2%   +110.28%  (p=0.000 n=9+10)
FieldsFunc/ASCII/16       40.5MB/s ± 1%   84.9MB/s ± 2%   +109.80%  (p=0.000 n=10+10)
FieldsFunc/ASCII/256      43.4MB/s ± 1%  127.9MB/s ± 1%   +194.58%  (p=0.000 n=10+10)
FieldsFunc/ASCII/4096     44.3MB/s ± 1%  124.2MB/s ± 1%   +180.44%  (p=0.000 n=10+9)
FieldsFunc/ASCII/65536    44.2MB/s ± 1%  120.6MB/s ± 1%   +173.06%  (p=0.000 n=10+9)
FieldsFunc/ASCII/1048576  41.8MB/s ± 1%  100.2MB/s ± 3%   +139.53%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16       39.8MB/s ± 1%   77.8MB/s ± 2%    +95.46%  (p=0.000 n=10+10)
FieldsFunc/Mixed/256      44.9MB/s ± 1%  129.4MB/s ± 1%   +187.97%  (p=0.000 n=10+10)
FieldsFunc/Mixed/4096     42.0MB/s ± 1%  115.6MB/s ± 1%   +175.08%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536    41.6MB/s ± 1%  107.3MB/s ± 1%   +157.75%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576  39.6MB/s ± 1%   91.8MB/s ± 2%   +131.72%  (p=0.000 n=10+10)

name                      old alloc/op   new alloc/op    delta
Fields/ASCII/16              80.0B ± 0%      80.0B ± 0%       ~     (all equal)
Fields/ASCII/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/ASCII/4096           9.47kB ± 0%     9.47kB ± 0%       ~     (all equal)
Fields/ASCII/65536           147kB ± 0%      147kB ± 0%       ~     (all equal)
Fields/ASCII/1048576        2.27MB ± 0%     2.27MB ± 0%       ~     (all equal)
Fields/Mixed/16              96.0B ± 0%      96.0B ± 0%       ~     (all equal)
Fields/Mixed/256              768B ± 0%       768B ± 0%       ~     (all equal)
Fields/Mixed/4096           9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
Fields/Mixed/65536           147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
Fields/Mixed/1048576        2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16          80.0B ± 0%      80.0B ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576    2.27MB ± 0%     9.61MB ± 0%   +323.72%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16          96.0B ± 0%      96.0B ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          768B ± 0%       768B ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096       9.47kB ± 0%    24.83kB ± 0%   +162.16%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536       147kB ± 0%      497kB ± 0%   +237.24%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576    2.26MB ± 0%     9.61MB ± 0%   +324.89%  (p=0.000 n=10+10)

name                      old allocs/op  new allocs/op   delta
Fields/ASCII/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/4096             1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/65536            1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/ASCII/1048576          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/16               1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/256              1.00 ± 0%       1.00 ± 0%       ~     (all equal)
Fields/Mixed/4096             1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
Fields/Mixed/65536            1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
Fields/Mixed/1048576          1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/ASCII/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/ASCII/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/16           1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/256          1.00 ± 0%       1.00 ± 0%       ~     (all equal)
FieldsFunc/Mixed/4096         1.00 ± 0%       5.00 ± 0%   +400.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/65536        1.00 ± 0%      12.00 ± 0%  +1100.00%  (p=0.000 n=10+10)
FieldsFunc/Mixed/1048576      1.00 ± 0%      24.00 ± 0%  +2300.00%  (p=0.000 n=10+10)

Change-Id: If1926782decc2f60d3b4b8c41c2ce7d8bdedfd8f
Reviewed-on: https://go-review.googlesource.com/55131
Run-TryBot: Martin Möhrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
src/bytes/bytes.go
src/bytes/bytes_test.go

index 7c878af688cc71462ab4e8bcbfa7cdaf6256f7b2..457e1494104833dd8cb5ad64dc6b225c97480e26 100644 (file)
@@ -265,9 +265,57 @@ func SplitAfter(s, sep []byte) [][]byte {
        return genSplit(s, sep, len(sep), -1)
 }
 
+var asciiSpace = [256]uint8{'\t': 1, '\n': 1, '\v': 1, '\f': 1, '\r': 1, ' ': 1}
+
 // Fields splits the slice s around each instance of one or more consecutive white space
-// characters, returning a slice of subslices of s or an empty list if s contains only white space.
+// characters, as defined by unicode.IsSpace, returning a slice of subslices of s or an
+// empty slice if s contains only white space.
 func Fields(s []byte) [][]byte {
+       // First count the fields.
+       // This is an exact count if s is ASCII, otherwise it is an approximation.
+       n := 0
+       wasSpace := 1
+       // setBits is used to track which bits are set in the bytes of s.
+       setBits := uint8(0)
+       for i := 0; i < len(s); i++ {
+               r := s[i]
+               setBits |= r
+               isSpace := int(asciiSpace[r])
+               n += wasSpace & ^isSpace
+               wasSpace = isSpace
+       }
+
+       if setBits < utf8.RuneSelf { // ASCII fast path
+               a := make([][]byte, n)
+               na := 0
+               fieldStart := 0
+               i := 0
+               // Skip spaces in the front of the input.
+               for i < len(s) && asciiSpace[s[i]] != 0 {
+                       i++
+               }
+               fieldStart = i
+               for i < len(s) {
+                       if asciiSpace[s[i]] == 0 {
+                               i++
+                               continue
+                       }
+                       a[na] = s[fieldStart:i]
+                       na++
+                       i++
+                       // Skip spaces in between fields.
+                       for i < len(s) && asciiSpace[s[i]] != 0 {
+                               i++
+                       }
+                       fieldStart = i
+               }
+               if fieldStart < len(s) { // Last field might end at EOF.
+                       a[na] = s[fieldStart:]
+               }
+               return a
+       }
+
+       // Some runes in the input slice are not ASCII.
        return FieldsFunc(s, unicode.IsSpace)
 }
 
@@ -278,39 +326,49 @@ func Fields(s []byte) [][]byte {
 // FieldsFunc makes no guarantees about the order in which it calls f(c).
 // If f does not return consistent results for a given c, FieldsFunc may crash.
 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
-       n := 0
-       inField := false
-       for i := 0; i < len(s); {
-               r, size := utf8.DecodeRune(s[i:])
-               wasInField := inField
-               inField = !f(r)
-               if inField && !wasInField {
-                       n++
-               }
-               i += size
+       // A span is used to record a slice of s of the form s[start:end].
+       // The start index is inclusive and the end index is exclusive.
+       type span struct {
+               start int
+               end   int
        }
+       spans := make([]span, 0, 32)
 
-       a := make([][]byte, n)
-       na := 0
-       fieldStart := -1
-       for i := 0; i <= len(s) && na < n; {
-               r, size := utf8.DecodeRune(s[i:])
-               if fieldStart < 0 && size > 0 && !f(r) {
-                       fieldStart = i
-                       i += size
-                       continue
-               }
-               if fieldStart >= 0 && (size == 0 || f(r)) {
-                       a[na] = s[fieldStart:i]
-                       na++
-                       fieldStart = -1
+       // Find the field start and end indices.
+       wasField := false
+       fromIndex := 0
+       for i := 0; i < len(s); {
+               size := 1
+               r := rune(s[i])
+               if r >= utf8.RuneSelf {
+                       r, size = utf8.DecodeRune(s[i:])
                }
-               if size == 0 {
-                       break
+               if f(r) {
+                       if wasField {
+                               spans = append(spans, span{start: fromIndex, end: i})
+                               wasField = false
+                       }
+               } else {
+                       if !wasField {
+                               fromIndex = i
+                               wasField = true
+                       }
                }
                i += size
        }
-       return a[0:na]
+
+       // Last field might end at EOF.
+       if wasField {
+               spans = append(spans, span{fromIndex, len(s)})
+       }
+
+       // Create subslices from recorded field indices.
+       a := make([][]byte, len(spans))
+       for i, span := range spans {
+               a[i] = s[span.start:span.end]
+       }
+
+       return a
 }
 
 // Join concatenates the elements of s to create a new byte slice. The separator
index ca0cdbb7c9f02bc78a092cf3cb14e89cb93870cd..db28497e39e4d16a713c3daa7982694759c20558 100644 (file)
@@ -1502,19 +1502,58 @@ var makeFieldsInput = func() []byte {
        return x
 }
 
-var fieldsInput = makeFieldsInput()
+var makeFieldsInputASCII = func() []byte {
+       x := make([]byte, 1<<20)
+       // Input is ~10% space, rest ASCII non-space.
+       for i := range x {
+               if rand.Intn(10) == 0 {
+                       x[i] = ' '
+               } else {
+                       x[i] = 'x'
+               }
+       }
+       return x
+}
+
+var bytesdata = []struct {
+       name string
+       data []byte
+}{
+       {"ASCII", makeFieldsInputASCII()},
+       {"Mixed", makeFieldsInput()},
+}
 
 func BenchmarkFields(b *testing.B) {
-       b.SetBytes(int64(len(fieldsInput)))
-       for i := 0; i < b.N; i++ {
-               Fields(fieldsInput)
+       for _, sd := range bytesdata {
+               b.Run(sd.name, func(b *testing.B) {
+                       for j := 1 << 4; j <= 1<<20; j <<= 4 {
+                               b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+                                       b.ReportAllocs()
+                                       b.SetBytes(int64(j))
+                                       data := sd.data[:j]
+                                       for i := 0; i < b.N; i++ {
+                                               Fields(data)
+                                       }
+                               })
+                       }
+               })
        }
 }
 
 func BenchmarkFieldsFunc(b *testing.B) {
-       b.SetBytes(int64(len(fieldsInput)))
-       for i := 0; i < b.N; i++ {
-               FieldsFunc(fieldsInput, unicode.IsSpace)
+       for _, sd := range bytesdata {
+               b.Run(sd.name, func(b *testing.B) {
+                       for j := 1 << 4; j <= 1<<20; j <<= 4 {
+                               b.Run(fmt.Sprintf("%d", j), func(b *testing.B) {
+                                       b.ReportAllocs()
+                                       b.SetBytes(int64(j))
+                                       data := sd.data[:j]
+                                       for i := 0; i < b.N; i++ {
+                                               FieldsFunc(data, unicode.IsSpace)
+                                       }
+                               })
+                       }
+               })
        }
 }