From 35b4fd9f373cbe13778eb259a19c496c9c613a1f Mon Sep 17 00:00:00 2001 From: tdakkota Date: Sat, 3 May 2025 19:45:02 +0300 Subject: [PATCH] bytes, strings: reduce Split{,After}Seq heap allocations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This CL slightly changes flow of splitSeq to help compiler to inline the iterator closure. goos: linux goarch: amd64 pkg: strings cpu: AMD Ryzen 9 5950X 16-Core Processor │ sec/op │ sec/op vs base │ SplitSeqEmptySeparator-32 3.590m ± 0% 3.430m ± 2% -4.46% (p=0.000 n=30) SplitSeqSingleByteSeparator-32 647.0µ ± 0% 656.1µ ± 0% +1.41% (p=0.000 n=30) SplitSeqMultiByteSeparator-32 423.9µ ± 1% 384.5µ ± 0% -9.31% (p=0.000 n=30) SplitAfterSeqEmptySeparator-32 3.372m ± 4% 3.514m ± 0% +4.20% (p=0.000 n=30) SplitAfterSeqSingleByteSeparator-32 648.5µ ± 2% 537.6µ ± 0% -17.10% (p=0.000 n=30) SplitAfterSeqMultiByteSeparator-32 423.3µ ± 2% 364.4µ ± 2% -13.91% (p=0.000 n=30) geomean 984.7µ 917.3µ -6.85% │ B/op │ B/op vs base │ SplitSeqEmptySeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) SplitSeqSingleByteSeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) SplitSeqMultiByteSeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) SplitAfterSeqEmptySeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) SplitAfterSeqSingleByteSeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) SplitAfterSeqMultiByteSeparator-32 24.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=30) geomean 24.00 ? For #73524 Change-Id: Ic83c5751a41c65030356a208e4ad1f500723e695 Reviewed-on: https://go-review.googlesource.com/c/go/+/669735 Auto-Submit: Alan Donovan LUCI-TryBot-Result: Go LUCI Reviewed-by: Alan Donovan Reviewed-by: Cherry Mui Reviewed-by: qiu laidongfeng2 <2645477756@qq.com> Commit-Queue: Alan Donovan --- src/bytes/iter.go | 21 +++++++-------- src/bytes/iter_test.go | 56 ++++++++++++++++++++++++++++++++++++++++ src/strings/iter.go | 21 +++++++-------- src/strings/iter_test.go | 52 +++++++++++++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 22 deletions(-) create mode 100644 src/bytes/iter_test.go create mode 100644 src/strings/iter_test.go diff --git a/src/bytes/iter.go b/src/bytes/iter.go index 8e9ee8b98d..e3af4d2f13 100644 --- a/src/bytes/iter.go +++ b/src/bytes/iter.go @@ -32,25 +32,24 @@ func Lines(s []byte) iter.Seq[[]byte] { } // explodeSeq returns an iterator over the runes in s. -func explodeSeq(s []byte) iter.Seq[[]byte] { - return func(yield func([]byte) bool) { - for len(s) > 0 { - _, size := utf8.DecodeRune(s) - if !yield(s[:size:size]) { - return - } - s = s[size:] +func explodeSeq(s []byte, yield func([]byte) bool) { + for len(s) > 0 { + _, size := utf8.DecodeRune(s) + if !yield(s[:size:size]) { + return } + s = s[size:] } } // splitSeq is SplitSeq or SplitAfterSeq, configured by how many // bytes of sep to include in the results (none or all). func splitSeq(s, sep []byte, sepSave int) iter.Seq[[]byte] { - if len(sep) == 0 { - return explodeSeq(s) - } return func(yield func([]byte) bool) { + if len(sep) == 0 { + explodeSeq(s, yield) + return + } for { i := Index(s, sep) if i < 0 { diff --git a/src/bytes/iter_test.go b/src/bytes/iter_test.go new file mode 100644 index 0000000000..e37fdfb96d --- /dev/null +++ b/src/bytes/iter_test.go @@ -0,0 +1,56 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bytes_test + +import ( + . "bytes" + "testing" +) + +func BenchmarkSplitSeqEmptySeparator(b *testing.B) { + for range b.N { + for range SplitSeq(benchInputHard, nil) { + } + } +} + +func BenchmarkSplitSeqSingleByteSeparator(b *testing.B) { + sep := []byte("/") + for range b.N { + for range SplitSeq(benchInputHard, sep) { + } + } +} + +func BenchmarkSplitSeqMultiByteSeparator(b *testing.B) { + sep := []byte("hello") + for range b.N { + for range SplitSeq(benchInputHard, sep) { + } + } +} + +func BenchmarkSplitAfterSeqEmptySeparator(b *testing.B) { + for range b.N { + for range SplitAfterSeq(benchInputHard, nil) { + } + } +} + +func BenchmarkSplitAfterSeqSingleByteSeparator(b *testing.B) { + sep := []byte("/") + for range b.N { + for range SplitAfterSeq(benchInputHard, sep) { + } + } +} + +func BenchmarkSplitAfterSeqMultiByteSeparator(b *testing.B) { + sep := []byte("hello") + for range b.N { + for range SplitAfterSeq(benchInputHard, sep) { + } + } +} diff --git a/src/strings/iter.go b/src/strings/iter.go index 3fd2c9da97..a42e78ee09 100644 --- a/src/strings/iter.go +++ b/src/strings/iter.go @@ -32,25 +32,24 @@ func Lines(s string) iter.Seq[string] { } // explodeSeq returns an iterator over the runes in s. -func explodeSeq(s string) iter.Seq[string] { - return func(yield func(string) bool) { - for len(s) > 0 { - _, size := utf8.DecodeRuneInString(s) - if !yield(s[:size]) { - return - } - s = s[size:] +func explodeSeq(s string, yield func(string) bool) { + for len(s) > 0 { + _, size := utf8.DecodeRuneInString(s) + if !yield(s[:size]) { + return } + s = s[size:] } } // splitSeq is SplitSeq or SplitAfterSeq, configured by how many // bytes of sep to include in the results (none or all). func splitSeq(s, sep string, sepSave int) iter.Seq[string] { - if len(sep) == 0 { - return explodeSeq(s) - } return func(yield func(string) bool) { + if len(sep) == 0 { + explodeSeq(s, yield) + return + } for { i := Index(s, sep) if i < 0 { diff --git a/src/strings/iter_test.go b/src/strings/iter_test.go new file mode 100644 index 0000000000..2db599377f --- /dev/null +++ b/src/strings/iter_test.go @@ -0,0 +1,52 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings_test + +import ( + . "strings" + "testing" +) + +func BenchmarkSplitSeqEmptySeparator(b *testing.B) { + for range b.N { + for range SplitSeq(benchInputHard, "") { + } + } +} + +func BenchmarkSplitSeqSingleByteSeparator(b *testing.B) { + for range b.N { + for range SplitSeq(benchInputHard, "/") { + } + } +} + +func BenchmarkSplitSeqMultiByteSeparator(b *testing.B) { + for range b.N { + for range SplitSeq(benchInputHard, "hello") { + } + } +} + +func BenchmarkSplitAfterSeqEmptySeparator(b *testing.B) { + for range b.N { + for range SplitAfterSeq(benchInputHard, "") { + } + } +} + +func BenchmarkSplitAfterSeqSingleByteSeparator(b *testing.B) { + for range b.N { + for range SplitAfterSeq(benchInputHard, "/") { + } + } +} + +func BenchmarkSplitAfterSeqMultiByteSeparator(b *testing.B) { + for range b.N { + for range SplitAfterSeq(benchInputHard, "hello") { + } + } +} -- 2.51.0