]> Cypherpunks repositories - gostls13.git/commitdiff
bytes, strings: reduce Split{,After}Seq heap allocations
authortdakkota <tanc13ya.ru@gmail.com>
Sat, 3 May 2025 16:45:02 +0000 (19:45 +0300)
committerGopher Robot <gobot@golang.org>
Tue, 6 May 2025 02:08:23 +0000 (19:08 -0700)
This CL slightly changes flow of splitSeq to help compiler to inline the iterator closure.

goos: linux
goarch: amd64
pkg: strings
cpu: AMD Ryzen 9 5950X 16-Core Processor
                                    │   sec/op    │   sec/op     vs base                │
SplitSeqEmptySeparator-32             3.590m ± 0%   3.430m ± 2%   -4.46% (p=0.000 n=30)
SplitSeqSingleByteSeparator-32        647.0µ ± 0%   656.1µ ± 0%   +1.41% (p=0.000 n=30)
SplitSeqMultiByteSeparator-32         423.9µ ± 1%   384.5µ ± 0%   -9.31% (p=0.000 n=30)
SplitAfterSeqEmptySeparator-32        3.372m ± 4%   3.514m ± 0%   +4.20% (p=0.000 n=30)
SplitAfterSeqSingleByteSeparator-32   648.5µ ± 2%   537.6µ ± 0%  -17.10% (p=0.000 n=30)
SplitAfterSeqMultiByteSeparator-32    423.3µ ± 2%   364.4µ ± 2%  -13.91% (p=0.000 n=30)
geomean                               984.7µ        917.3µ        -6.85%

                                    │    B/op    │   B/op     vs base                     │
SplitSeqEmptySeparator-32             24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
SplitSeqSingleByteSeparator-32        24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
SplitSeqMultiByteSeparator-32         24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
SplitAfterSeqEmptySeparator-32        24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
SplitAfterSeqSingleByteSeparator-32   24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
SplitAfterSeqMultiByteSeparator-32    24.00 ± 0%   0.00 ± 0%  -100.00% (p=0.000 n=30)
geomean                               24.00                   ?

For #73524

Change-Id: Ic83c5751a41c65030356a208e4ad1f500723e695
Reviewed-on: https://go-review.googlesource.com/c/go/+/669735
Auto-Submit: Alan Donovan <adonovan@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Alan Donovan <adonovan@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: qiu laidongfeng2 <2645477756@qq.com>
Commit-Queue: Alan Donovan <adonovan@google.com>

src/bytes/iter.go
src/bytes/iter_test.go [new file with mode: 0644]
src/strings/iter.go
src/strings/iter_test.go [new file with mode: 0644]

index 8e9ee8b98d88dbbba78c33e1ead55cc1a291c687..e3af4d2f13927da8bc409a9ca511a9a5acca1d9c 100644 (file)
@@ -32,25 +32,24 @@ func Lines(s []byte) iter.Seq[[]byte] {
 }
 
 // explodeSeq returns an iterator over the runes in s.
-func explodeSeq(s []byte) iter.Seq[[]byte] {
-       return func(yield func([]byte) bool) {
-               for len(s) > 0 {
-                       _, size := utf8.DecodeRune(s)
-                       if !yield(s[:size:size]) {
-                               return
-                       }
-                       s = s[size:]
+func explodeSeq(s []byte, yield func([]byte) bool) {
+       for len(s) > 0 {
+               _, size := utf8.DecodeRune(s)
+               if !yield(s[:size:size]) {
+                       return
                }
+               s = s[size:]
        }
 }
 
 // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
 // bytes of sep to include in the results (none or all).
 func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] {
-       if len(sep) == 0 {
-               return explodeSeq(s)
-       }
        return func(yield func([]byte) bool) {
+               if len(sep) == 0 {
+                       explodeSeq(s, yield)
+                       return
+               }
                for {
                        i := Index(s, sep)
                        if i < 0 {
diff --git a/src/bytes/iter_test.go b/src/bytes/iter_test.go
new file mode 100644 (file)
index 0000000..e37fdfb
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytes_test
+
+import (
+       . "bytes"
+       "testing"
+)
+
+func BenchmarkSplitSeqEmptySeparator(b *testing.B) {
+       for range b.N {
+               for range SplitSeq(benchInputHard, nil) {
+               }
+       }
+}
+
+func BenchmarkSplitSeqSingleByteSeparator(b *testing.B) {
+       sep := []byte("/")
+       for range b.N {
+               for range SplitSeq(benchInputHard, sep) {
+               }
+       }
+}
+
+func BenchmarkSplitSeqMultiByteSeparator(b *testing.B) {
+       sep := []byte("hello")
+       for range b.N {
+               for range SplitSeq(benchInputHard, sep) {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqEmptySeparator(b *testing.B) {
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, nil) {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqSingleByteSeparator(b *testing.B) {
+       sep := []byte("/")
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, sep) {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqMultiByteSeparator(b *testing.B) {
+       sep := []byte("hello")
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, sep) {
+               }
+       }
+}
index 3fd2c9da978263c06db0803f3adc51f656cbd200..a42e78ee09fce1cdc2cad272fad70fccc3e8e538 100644 (file)
@@ -32,25 +32,24 @@ func Lines(s string) iter.Seq[string] {
 }
 
 // explodeSeq returns an iterator over the runes in s.
-func explodeSeq(s string) iter.Seq[string] {
-       return func(yield func(string) bool) {
-               for len(s) > 0 {
-                       _, size := utf8.DecodeRuneInString(s)
-                       if !yield(s[:size]) {
-                               return
-                       }
-                       s = s[size:]
+func explodeSeq(s string, yield func(string) bool) {
+       for len(s) > 0 {
+               _, size := utf8.DecodeRuneInString(s)
+               if !yield(s[:size]) {
+                       return
                }
+               s = s[size:]
        }
 }
 
 // splitSeq is SplitSeq or SplitAfterSeq, configured by how many
 // bytes of sep to include in the results (none or all).
 func splitSeq(s, sep string, sepSave int) iter.Seq[string] {
-       if len(sep) == 0 {
-               return explodeSeq(s)
-       }
        return func(yield func(string) bool) {
+               if len(sep) == 0 {
+                       explodeSeq(s, yield)
+                       return
+               }
                for {
                        i := Index(s, sep)
                        if i < 0 {
diff --git a/src/strings/iter_test.go b/src/strings/iter_test.go
new file mode 100644 (file)
index 0000000..2db5993
--- /dev/null
@@ -0,0 +1,52 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings_test
+
+import (
+       . "strings"
+       "testing"
+)
+
+func BenchmarkSplitSeqEmptySeparator(b *testing.B) {
+       for range b.N {
+               for range SplitSeq(benchInputHard, "") {
+               }
+       }
+}
+
+func BenchmarkSplitSeqSingleByteSeparator(b *testing.B) {
+       for range b.N {
+               for range SplitSeq(benchInputHard, "/") {
+               }
+       }
+}
+
+func BenchmarkSplitSeqMultiByteSeparator(b *testing.B) {
+       for range b.N {
+               for range SplitSeq(benchInputHard, "hello") {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqEmptySeparator(b *testing.B) {
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, "") {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqSingleByteSeparator(b *testing.B) {
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, "/") {
+               }
+       }
+}
+
+func BenchmarkSplitAfterSeqMultiByteSeparator(b *testing.B) {
+       for range b.N {
+               for range SplitAfterSeq(benchInputHard, "hello") {
+               }
+       }
+}