]> Cypherpunks repositories - gostls13.git/commitdiff
strings: use Builder in Map
authorMichael Fraenkel <michael.fraenkel@gmail.com>
Tue, 10 Jul 2018 02:37:59 +0000 (22:37 -0400)
committerBrad Fitzpatrick <bradfitz@golang.org>
Wed, 22 Aug 2018 00:53:54 +0000 (00:53 +0000)
Use a builder to avoid the copy when converting the []byte to a string.

name                  old time/op    new time/op    delta
ByteByteMap-8            796ns ± 5%     700ns ± 1%  -12.00%  (p=0.000 n=9+8)
Map/identity/ASCII-8     123ns ± 8%     126ns ± 7%     ~     (p=0.194 n=10+10)
Map/identity/Greek-8     198ns ± 2%     204ns ± 5%   +2.99%  (p=0.008 n=9+10)
Map/change/ASCII-8       266ns ±10%     202ns ± 3%  -24.19%  (p=0.000 n=10+10)
Map/change/Greek-8       450ns ± 4%     406ns ± 1%   -9.73%  (p=0.000 n=9+10)
MapNoChanges-8          85.4ns ± 3%    90.2ns ±11%   +5.67%  (p=0.000 n=9+10)

name                  old alloc/op   new alloc/op   delta
ByteByteMap-8             416B ± 0%      208B ± 0%  -50.00%  (p=0.000 n=10+10)
Map/identity/ASCII-8     0.00B          0.00B          ~     (all equal)
Map/identity/Greek-8     0.00B          0.00B          ~     (all equal)
Map/change/ASCII-8        128B ± 0%       64B ± 0%  -50.00%  (p=0.000 n=10+10)
Map/change/Greek-8        160B ± 0%       80B ± 0%  -50.00%  (p=0.000 n=10+10)
MapNoChanges-8           0.00B          0.00B          ~     (all equal)

name                  old allocs/op  new allocs/op  delta
ByteByteMap-8             2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Map/identity/ASCII-8      0.00           0.00          ~     (all equal)
Map/identity/Greek-8      0.00           0.00          ~     (all equal)
Map/change/ASCII-8        2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
Map/change/Greek-8        2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
MapNoChanges-8            0.00           0.00          ~     (all equal)

Fixes #26304

Change-Id: Ideec9dfc29b0b8107f34fc634247081d0031777d
Reviewed-on: https://go-review.googlesource.com/122875
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/strings/strings.go
src/strings/strings_test.go

index 9e7d4f0455982be9dec70681a896ad847d1773f3..e54f0c2bfa7623ccfdba65da09feeb0222b4b857 100644 (file)
@@ -466,9 +466,7 @@ func Map(mapping func(rune) rune, s string) string {
 
        // The output buffer b is initialized on demand, the first
        // time a character differs.
-       var b []byte
-       // nbytes is the number of bytes encoded in b.
-       var nbytes int
+       var b Builder
 
        for i, c := range s {
                r := mapping(c)
@@ -476,15 +474,10 @@ func Map(mapping func(rune) rune, s string) string {
                        continue
                }
 
-               b = make([]byte, len(s)+utf8.UTFMax)
-               nbytes = copy(b, s[:i])
+               b.Grow(len(s) + utf8.UTFMax)
+               b.WriteString(s[:i])
                if r >= 0 {
-                       if r < utf8.RuneSelf {
-                               b[nbytes] = byte(r)
-                               nbytes++
-                       } else {
-                               nbytes += utf8.EncodeRune(b[nbytes:], r)
-                       }
+                       b.WriteRune(r)
                }
 
                if c == utf8.RuneError {
@@ -501,33 +494,28 @@ func Map(mapping func(rune) rune, s string) string {
                break
        }
 
-       if b == nil {
+       // Fast path for unchanged input
+       if b.Cap() == 0 { // didn't call b.Grow above
                return s
        }
 
        for _, c := range s {
                r := mapping(c)
 
-               // common case
-               if (0 <= r && r < utf8.RuneSelf) && nbytes < len(b) {
-                       b[nbytes] = byte(r)
-                       nbytes++
-                       continue
-               }
-
-               // b is not big enough or r is not a ASCII rune.
                if r >= 0 {
-                       if nbytes+utf8.UTFMax >= len(b) {
-                               // Grow the buffer.
-                               nb := make([]byte, 2*len(b))
-                               copy(nb, b[:nbytes])
-                               b = nb
+                       // common case
+                       // Due to inlining, it is more performant to determine if WriteByte should be
+                       // invoked rather than always call WriteRune
+                       if r < utf8.RuneSelf {
+                               b.WriteByte(byte(r))
+                       } else {
+                               // r is not a ASCII rune.
+                               b.WriteRune(r)
                        }
-                       nbytes += utf8.EncodeRune(b[nbytes:], r)
                }
        }
 
-       return string(b[:nbytes])
+       return b.String()
 }
 
 // Repeat returns a new string consisting of count copies of the string s.
index 78bc573e5f0bf18064d521c6c0bfadaf48033643..bb46e136f26bcf7ae5a81ee6d848076e294d05eb 100644 (file)
@@ -673,6 +673,19 @@ func TestMap(t *testing.T) {
        if m != s {
                t.Errorf("encoding not handled correctly: expected %q got %q", s, m)
        }
+
+       // 9. Check mapping occurs in the front, middle and back
+       trimSpaces := func(r rune) rune {
+               if unicode.IsSpace(r) {
+                       return -1
+               }
+               return r
+       }
+       m = Map(trimSpaces, "   abc    123   ")
+       expect = "abc123"
+       if m != expect {
+               t.Errorf("trimSpaces: expected %q got %q", expect, m)
+       }
 }
 
 func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }