]> Cypherpunks repositories - gostls13.git/commitdiff
bytes: speed up Replace
authorJulien Cretel <jub0bsinthecloud@gmail.com>
Thu, 20 Mar 2025 20:42:05 +0000 (20:42 +0000)
committerGopher Robot <gobot@golang.org>
Tue, 25 Mar 2025 20:02:53 +0000 (13:02 -0700)
This CL is to package bytes what CL 657935 was to package strings.

Add fuzz test and benchmarks for Replace.

The length of parameter old does not change. Move the corresponding length
check outside the loop. Use range-over-int loops where possible.

Some benchmark results (no changes to allocations):

goos: darwin
goarch: amd64
pkg: bytes
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
                                 │     old      │                 new                 │
                                 │    sec/op    │   sec/op     vs base                │
Replace/"hello"_"l"_"L"_0-8         27.83n ± 2%   27.22n ± 1%   -2.17% (p=0.000 n=20)
Replace/"hello"_"l"_"L"_-1-8        60.64n ± 0%   57.97n ± 1%   -4.40% (p=0.000 n=20)
Replace/"hello"_"x"_"X"_-1-8        33.99n ± 0%   33.88n ± 0%        ~ (p=0.140 n=20)
Replace/""_"x"_"X"_-1-8             10.40n ± 1%   10.57n ± 0%   +1.64% (p=0.000 n=20)
Replace/"radar"_"r"_"<r>"_-1-8      62.63n ± 0%   61.39n ± 0%   -1.98% (p=0.000 n=20)
Replace/""_""_"<>"_-1-8             29.76n ± 1%   24.18n ± 1%  -18.75% (p=0.000 n=20)
Replace/"banana"_"a"_"<>"_-1-8      77.00n ± 0%   77.10n ± 1%        ~ (p=0.525 n=20)
Replace/"banana"_"a"_"<>"_1-8       44.24n ± 0%   43.57n ± 1%   -1.54% (p=0.000 n=20)
Replace/"banana"_"a"_"<>"_1000-8    78.23n ± 0%   77.16n ± 1%   -1.36% (p=0.000 n=20)
Replace/"banana"_"an"_"<>"_-1-8     72.78n ± 1%   69.97n ± 1%   -3.85% (p=0.000 n=20)
Replace/"banana"_"ana"_"<>"_-1-8    54.41n ± 0%   54.04n ± 1%   -0.67% (p=0.033 n=20)
Replace/"banana"_""_"<>"_-1-8       116.8n ± 1%   103.5n ± 1%  -11.42% (p=0.000 n=20)
Replace/"banana"_""_"<>"_10-8       117.2n ± 1%   103.6n ± 0%  -11.60% (p=0.000 n=20)
Replace/"banana"_""_"<>"_6-8       105.30n ± 0%   92.50n ± 0%  -12.16% (p=0.000 n=20)
Replace/"banana"_""_"<>"_5-8        91.81n ± 0%   79.87n ± 1%  -13.01% (p=0.000 n=20)
Replace/"banana"_""_"<>"_1-8        35.87n ± 1%   30.33n ± 1%  -15.43% (p=0.000 n=20)
Replace/"banana"_"a"_"a"_-1-8       70.84n ± 0%   68.83n ± 0%   -2.84% (p=0.000 n=20)
Replace/"banana"_"a"_"a"_1-8        44.27n ± 1%   43.47n ± 1%   -1.80% (p=0.000 n=20)
Replace/"☺☻☹"_""_"<>"_-1-8         104.25n ± 1%   93.33n ± 0%  -10.48% (p=0.000 n=20)
geomean                             56.31n        52.88n        -6.09%

Change-Id: I5daf44ccfd887da445d8c681415c32de7c2b85d1
GitHub-Last-Rev: d1caf1f0845a402a026764068a1db4dcf73e9017
GitHub-Pull-Request: golang/go#72967
Reviewed-on: https://go-review.googlesource.com/c/go/+/659515
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/bytes/bytes.go
src/bytes/bytes_test.go

index 4bc375df192ce441c898fa04108e6dbe744dc02e..8198415c3e193849aa9c9ca56ef990bdd9116511 100644 (file)
@@ -1192,19 +1192,22 @@ func Replace(s, old, new []byte, n int) []byte {
        t := make([]byte, len(s)+n*(len(new)-len(old)))
        w := 0
        start := 0
-       for i := 0; i < n; i++ {
-               j := start
-               if len(old) == 0 {
-                       if i > 0 {
-                               _, wid := utf8.DecodeRune(s[start:])
-                               j += wid
-                       }
-               } else {
-                       j += Index(s[start:], old)
-               }
-               w += copy(t[w:], s[start:j])
+       if len(old) > 0 {
+               for range n {
+                       j := start + Index(s[start:], old)
+                       w += copy(t[w:], s[start:j])
+                       w += copy(t[w:], new)
+                       start = j + len(old)
+               }
+       } else { // len(old) == 0
                w += copy(t[w:], new)
-               start = j + len(old)
+               for range n - 1 {
+                       _, wid := utf8.DecodeRune(s[start:])
+                       j := start + wid
+                       w += copy(t[w:], s[start:j])
+                       w += copy(t[w:], new)
+                       start = j
+               }
        }
        w += copy(t[w:], s[start:])
        return t[0:w]
index ead581718ace8fb21fa06ea0b8e14739e84b7228..14b52a80358bb7efa51402860dd85b18511c3460 100644 (file)
@@ -7,6 +7,7 @@ package bytes_test
 import (
        . "bytes"
        "fmt"
+       "internal/asan"
        "internal/testenv"
        "iter"
        "math"
@@ -1786,9 +1787,20 @@ var ReplaceTests = []ReplaceTest{
 
 func TestReplace(t *testing.T) {
        for _, tt := range ReplaceTests {
-               in := append([]byte(tt.in), "<spare>"...)
+               var (
+                       in  = []byte(tt.in)
+                       old = []byte(tt.old)
+                       new = []byte(tt.new)
+               )
+               if !asan.Enabled {
+                       allocs := testing.AllocsPerRun(10, func() { Replace(in, old, new, tt.n) })
+                       if allocs > 1 {
+                               t.Errorf("Replace(%q, %q, %q, %d) allocates %.2f objects", tt.in, tt.old, tt.new, tt.n, allocs)
+                       }
+               }
+               in = append(in, "<spare>"...)
                in = in[:len(tt.in)]
-               out := Replace(in, []byte(tt.old), []byte(tt.new), tt.n)
+               out := Replace(in, old, new, tt.n)
                if s := string(out); s != tt.out {
                        t.Errorf("Replace(%q, %q, %q, %d) = %q, want %q", tt.in, tt.old, tt.new, tt.n, s, tt.out)
                }
@@ -1796,7 +1808,7 @@ func TestReplace(t *testing.T) {
                        t.Errorf("Replace(%q, %q, %q, %d) didn't copy", tt.in, tt.old, tt.new, tt.n)
                }
                if tt.n == -1 {
-                       out := ReplaceAll(in, []byte(tt.old), []byte(tt.new))
+                       out := ReplaceAll(in, old, new)
                        if s := string(out); s != tt.out {
                                t.Errorf("ReplaceAll(%q, %q, %q) = %q, want %q", tt.in, tt.old, tt.new, s, tt.out)
                        }
@@ -1804,6 +1816,69 @@ func TestReplace(t *testing.T) {
        }
 }
 
+func FuzzReplace(f *testing.F) {
+       for _, tt := range ReplaceTests {
+               f.Add([]byte(tt.in), []byte(tt.old), []byte(tt.new), tt.n)
+       }
+       f.Fuzz(func(t *testing.T, in, old, new []byte, n int) {
+               differentImpl := func(in, old, new []byte, n int) []byte {
+                       var out Buffer
+                       if n < 0 {
+                               n = math.MaxInt
+                       }
+                       for i := 0; i < len(in); {
+                               if n == 0 {
+                                       out.Write(in[i:])
+                                       break
+                               }
+                               if HasPrefix(in[i:], old) {
+                                       out.Write(new)
+                                       i += len(old)
+                                       n--
+                                       if len(old) != 0 {
+                                               continue
+                                       }
+                                       if i == len(in) {
+                                               break
+                                       }
+                               }
+                               if len(old) == 0 {
+                                       _, length := utf8.DecodeRune(in[i:])
+                                       out.Write(in[i : i+length])
+                                       i += length
+                               } else {
+                                       out.WriteByte(in[i])
+                                       i++
+                               }
+                       }
+                       if len(old) == 0 && n != 0 {
+                               out.Write(new)
+                       }
+                       return out.Bytes()
+               }
+               if simple, replace := differentImpl(in, old, new, n), Replace(in, old, new, n); !slices.Equal(simple, replace) {
+                       t.Errorf("The two implementations do not match %q != %q for Replace(%q, %q, %q, %d)", simple, replace, in, old, new, n)
+               }
+       })
+}
+
+func BenchmarkReplace(b *testing.B) {
+       for _, tt := range ReplaceTests {
+               desc := fmt.Sprintf("%q %q %q %d", tt.in, tt.old, tt.new, tt.n)
+               var (
+                       in  = []byte(tt.in)
+                       old = []byte(tt.old)
+                       new = []byte(tt.new)
+               )
+               b.Run(desc, func(b *testing.B) {
+                       b.ReportAllocs()
+                       for b.Loop() {
+                               Replace(in, old, new, tt.n)
+                       }
+               })
+       }
+}
+
 type TitleTest struct {
        in, out string
 }