]> Cypherpunks repositories - gostls13.git/commitdiff
bytes, strings: speed up Split{,After}Seq
authorJulien Cretel <jub0bsinthecloud@gmail.com>
Tue, 13 May 2025 11:11:13 +0000 (11:11 +0000)
committerGopher Robot <gobot@golang.org>
Wed, 14 May 2025 18:07:32 +0000 (11:07 -0700)
CL 669735 brought a welcome performance boost to splitSeq; however, it rendered explodeSeq ineligible for inlining and failed to update that function's doc comment.

This CL inlines the call to explodeSeq in splitSeq, thereby unlocking a further speedup in the case of an empty separator, and removes function explodeSeq altogether.

Some benchmarks results:

goos: darwin
goarch: amd64
pkg: strings
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                                   │     old     │                 new                  │
                                   │   sec/op    │    sec/op     vs base                │
SplitSeqEmptySeparator-8             5.136m ± 6%    3.180m ± 6%  -38.09% (p=0.000 n=20)
SplitSeqSingleByteSeparator-8        995.9µ ± 1%    988.4µ ± 0%   -0.75% (p=0.000 n=20)
SplitSeqMultiByteSeparator-8         593.1µ ± 2%    591.7µ ± 1%        ~ (p=0.253 n=20)
SplitAfterSeqEmptySeparator-8        5.554m ± 3%    3.432m ± 2%  -38.20% (p=0.000 n=20)
SplitAfterSeqSingleByteSeparator-8   997.4µ ± 0%   1000.0µ ± 8%        ~ (p=0.121 n=20)
SplitAfterSeqMultiByteSeparator-8    591.7µ ± 1%    588.9µ ± 0%   -0.48% (p=0.004 n=20)
geomean                              1.466m         1.247m       -14.97%

                                   │     old      │                 new                 │
                                   │     B/op     │    B/op     vs base                 │
SplitSeqEmptySeparator-8             0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitSeqSingleByteSeparator-8        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitSeqMultiByteSeparator-8         0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqEmptySeparator-8        0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqSingleByteSeparator-8   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
SplitAfterSeqMultiByteSeparator-8    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=20) ¹
geomean                                         ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

Change-Id: I5767b68dc1a4fbcb2ac20683830a49ee3eb1bee1
GitHub-Last-Rev: 344934071f3220a1afea3def306dadfee720d311
GitHub-Pull-Request: golang/go#73685
Reviewed-on: https://go-review.googlesource.com/c/go/+/672175
Reviewed-by: qiu laidongfeng2 <2645477756@qq.com>
Reviewed-by: Robert Griesemer <gri@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/bytes/iter.go
src/strings/iter.go

index e3af4d2f13927da8bc409a9ca511a9a5acca1d9c..b2abb2c9ba3dc6477fb3c10c37ae9c54da44712c 100644 (file)
@@ -31,23 +31,18 @@ func Lines(s []byte) iter.Seq[[]byte] {
        }
 }
 
-// explodeSeq returns an iterator over the runes in s.
-func explodeSeq(s []byte, yield func([]byte) bool) {
-       for len(s) > 0 {
-               _, size := utf8.DecodeRune(s)
-               if !yield(s[:size:size]) {
-                       return
-               }
-               s = s[size:]
-       }
-}
-
 // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
 // bytes of sep to include in the results (none or all).
 func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
        return func(yield func([]byte) bool) {
                if len(sep) == 0 {
-                       explodeSeq(s, yield)
+                       for len(s) > 0 {
+                               _, size := utf8.DecodeRune(s)
+                               if !yield(s[:size:size]) {
+                                       return
+                               }
+                               s = s[size:]
+                       }
                        return
                }
                for {
index a42e78ee09fce1cdc2cad272fad70fccc3e8e538..69fe031739628ce89c47e5ea011083fca1c350da 100644 (file)
@@ -31,23 +31,18 @@ func Lines(s string) iter.Seq[string] {
        }
 }
 
-// explodeSeq returns an iterator over the runes in s.
-func explodeSeq(s string, yield func(string) bool) {
-       for len(s) > 0 {
-               _, size := utf8.DecodeRuneInString(s)
-               if !yield(s[:size]) {
-                       return
-               }
-               s = s[size:]
-       }
-}
-
 // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
 // bytes of sep to include in the results (none or all).
 func splitSeq(s, sep string, sepSave int) iter.Seq[string] {
        return func(yield func(string) bool) {
                if len(sep) == 0 {
-                       explodeSeq(s, yield)
+                       for len(s) > 0 {
+                               _, size := utf8.DecodeRuneInString(s)
+                               if !yield(s[:size]) {
+                                       return
+                               }
+                               s = s[size:]
+                       }
                        return
                }
                for {