]> Cypherpunks repositories - gostls13.git/commitdiff
strings: speed up Map
authorMartin Möhrmann <moehrmann@google.com>
Mon, 14 Nov 2016 07:05:45 +0000 (08:05 +0100)
committerMartin Möhrmann <moehrmann@google.com>
Fri, 24 Feb 2017 22:53:05 +0000 (22:53 +0000)
name                  old time/op  new time/op  delta
ByteByteMap-4         2.03µs ± 2%  1.03µs ± 2%  -49.24%  (p=0.000 n=10+10)
Map/identity/ASCII-4   246ns ± 0%   158ns ± 0%  -35.90%    (p=0.000 n=9+9)
Map/identity/Greek-4   367ns ± 1%   273ns ± 1%  -25.63%  (p=0.000 n=10+10)
Map/change/ASCII-4     582ns ± 1%   324ns ± 1%  -44.34%  (p=0.000 n=10+10)
Map/change/Greek-4     709ns ± 2%   623ns ± 2%  -12.16%  (p=0.000 n=10+10)
MapNoChanges-4         171ns ± 1%   111ns ± 1%  -35.36%   (p=0.000 n=8+10)

Updates #17859

Change-Id: I55d7d261fdc1ce2dcd0ebe23b0fa20b9889bf54c
Reviewed-on: https://go-review.googlesource.com/33201
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/strings/replace_test.go
src/strings/strings.go

index 77e48b988bcefc4638a4828767cd8fd2d623526d..34b5badfadf5dc3388476aaf351fa51b55b22759 100644 (file)
@@ -540,3 +540,44 @@ func BenchmarkByteByteMap(b *testing.B) {
                Map(fn, str)
        }
 }
+
+var mapdata = []struct{ name, data string }{
+       {"ASCII", "a b c d e f g h i j k l m n o p q r s t u v w x y z"},
+       {"Greek", "α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ ς σ τ υ φ χ ψ ω"},
+}
+
+func BenchmarkMap(b *testing.B) {
+       mapidentity := func(r rune) rune {
+               return r
+       }
+
+       b.Run("identity", func(b *testing.B) {
+               for _, md := range mapdata {
+                       b.Run(md.name, func(b *testing.B) {
+                               for i := 0; i < b.N; i++ {
+                                       Map(mapidentity, md.data)
+                               }
+                       })
+               }
+       })
+
+       mapchange := func(r rune) rune {
+               if 'a' <= r && r <= 'z' {
+                       return r + 'A' - 'a'
+               }
+               if 'α' <= r && r <= 'ω' {
+                       return r + 'Α' - 'α'
+               }
+               return r
+       }
+
+       b.Run("change", func(b *testing.B) {
+               for _, md := range mapdata {
+                       b.Run(md.name, func(b *testing.B) {
+                               for i := 0; i < b.N; i++ {
+                                       Map(mapchange, md.data)
+                               }
+                       })
+               }
+       })
+}
index 5bc60e8a85a3f07e16b50a8b9b41f55c47687d7f..188d8cbc09345e14bce76a7c5824dfce880cc559 100644 (file)
@@ -383,40 +383,61 @@ func Map(mapping func(rune) rune, s string) string {
        // In the worst case, the string can grow when mapped, making
        // things unpleasant. But it's so rare we barge in assuming it's
        // fine. It could also shrink but that falls out naturally.
-       maxbytes := len(s) // length of b
-       nbytes := 0        // number of bytes encoded in b
+
        // The output buffer b is initialized on demand, the first
        // time a character differs.
        var b []byte
+       // nbytes is the number of bytes encoded in b.
+       var nbytes int
 
        for i, c := range s {
                r := mapping(c)
-               if b == nil {
-                       if r == c {
-                               continue
-                       }
-                       b = make([]byte, maxbytes)
-                       nbytes = copy(b, s[:i])
+               if r == c {
+                       continue
                }
+
+               b = make([]byte, len(s)+utf8.UTFMax)
+               nbytes = copy(b, s[:i])
                if r >= 0 {
-                       wid := 1
-                       if r >= utf8.RuneSelf {
-                               wid = utf8.RuneLen(r)
+                       if r <= utf8.RuneSelf {
+                               b[nbytes] = byte(r)
+                               nbytes++
+                       } else {
+                               nbytes += utf8.EncodeRune(b[nbytes:], r)
                        }
-                       if nbytes+wid > maxbytes {
-                               // Grow the buffer.
-                               maxbytes = maxbytes*2 + utf8.UTFMax
-                               nb := make([]byte, maxbytes)
-                               copy(nb, b[0:nbytes])
-                               b = nb
-                       }
-                       nbytes += utf8.EncodeRune(b[nbytes:maxbytes], r)
                }
+               i += utf8.RuneLen(c)
+               s = s[i:]
+               break
        }
+
        if b == nil {
                return s
        }
-       return string(b[0:nbytes])
+
+       for _, c := range s {
+               r := mapping(c)
+
+               // common case
+               if (0 <= r && r <= utf8.RuneSelf) && nbytes < len(b) {
+                       b[nbytes] = byte(r)
+                       nbytes++
+                       continue
+               }
+
+               // b is not big enough or r is not a ASCII rune.
+               if r >= 0 {
+                       if nbytes+utf8.UTFMax >= len(b) {
+                               // Grow the buffer.
+                               nb := make([]byte, 2*len(b))
+                               copy(nb, b[:nbytes])
+                               b = nb
+                       }
+                       nbytes += utf8.EncodeRune(b[nbytes:], r)
+               }
+       }
+
+       return string(b[:nbytes])
 }
 
 // Repeat returns a new string consisting of count copies of the string s.