From: Ilya Tocar Date: Mon, 26 Feb 2018 20:00:57 +0000 (-0600) Subject: strings: speed-up replace for byteStringReplacer case X-Git-Tag: go1.11beta1~1009 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=fd22542eaa02f73bdd1e74b5116165813fc3458b;p=gostls13.git strings: speed-up replace for byteStringReplacer case Use Count instead of loop to determine a number of replacements. Also increment index instead of advancing slices, to avoid some extra stores. Shows very significant speed-up on html benchmarks: Escape-6 34.2µs ± 2% 20.8µs ± 2% -39.06% (p=0.000 n=10+10) EscapeNone-6 7.04µs ± 1% 1.05µs ± 0% -85.03% (p=0.000 n=10+10) On benchmarks in package strings results are still significant: ByteStringMatch-6 1.59µs ± 2% 1.17µs ± 2% -26.35% (p=0.000 n=10+10) HTMLEscapeNew-6 390ns ± 2% 337ns ± 2% -13.62% (p=0.000 n=10+10) HTMLEscapeOld-6 621ns ± 2% 603ns ± 2% -2.95% (p=0.000 n=10+9) Change-Id: Ibea3235b6e451ba72cd5db57716d17b917e72944 Reviewed-on: https://go-review.googlesource.com/97255 Run-TryBot: Ilya Tocar TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- diff --git a/src/strings/replace.go b/src/strings/replace.go index 4752641be0..bb0524427b 100644 --- a/src/strings/replace.go +++ b/src/strings/replace.go @@ -54,13 +54,21 @@ func NewReplacer(oldnew ...string) *Replacer { return &Replacer{r: &r} } - r := byteStringReplacer{} + r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)} // The first occurrence of old->new map takes precedence // over the others with the same old string. for i := len(oldnew) - 2; i >= 0; i -= 2 { o := oldnew[i][0] n := oldnew[i+1] - r[o] = []byte(n) + // To avoid counting repetitions multiple times. + if r.replacements[o] == nil { + // We need to use string([]byte{o}) instead of string(o), + // to avoid utf8 encoding of o. + // E. g. byte(150) produces string of length 2. + r.toReplace = append(r.toReplace, string([]byte{o})) + } + r.replacements[o] = []byte(n) + } return &Replacer{r: &r} } @@ -454,34 +462,60 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) { // byteStringReplacer is the implementation that's used when all the // "old" values are single ASCII bytes but the "new" values vary in size. -// The array contains replacement byte slices indexed by old byte. -// A nil []byte means that the old byte should not be replaced. -type byteStringReplacer [256][]byte +type byteStringReplacer struct { + // replacements contains replacement byte slices indexed by old byte. + // A nil []byte means that the old byte should not be replaced. + replacements [256][]byte + // toReplace keeps a list of bytes to replace. Depending on length of toReplace + // and length of target string it may be faster to use Count, or a plain loop. + // We store single byte as a string, because Count takes a string. + toReplace []string +} + +// countCutOff controls the ratio of a string length to a number of replacements +// at which (*byteStringReplacer).Replace switches algorithms. +// For strings with higher ration of length to replacements than that value, +// we call Count, for each replacement from toReplace. +// For strings, with a lower ratio we use simple loop, because of Count overhead. +// countCutOff is an empirically determined overhead multiplier. +// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining. +const countCutOff = 8 func (r *byteStringReplacer) Replace(s string) string { newSize := len(s) anyChanges := false - for i := 0; i < len(s); i++ { - b := s[i] - if r[b] != nil { - anyChanges = true - // The -1 is because we are replacing 1 byte with len(r[b]) bytes. - newSize += len(r[b]) - 1 + // Is it faster to use Count? + if len(r.toReplace)*countCutOff <= len(s) { + for _, x := range r.toReplace { + if c := Count(s, x); c != 0 { + // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes. + newSize += c * (len(r.replacements[x[0]]) - 1) + anyChanges = true + } + + } + } else { + for i := 0; i < len(s); i++ { + b := s[i] + if r.replacements[b] != nil { + // See above for explanation of -1 + newSize += len(r.replacements[b]) - 1 + anyChanges = true + } } } if !anyChanges { return s } buf := make([]byte, newSize) - bi := buf + j := 0 for i := 0; i < len(s); i++ { b := s[i] - if r[b] != nil { - n := copy(bi, r[b]) - bi = bi[n:] + if r.replacements[b] != nil { + j += copy(buf[j:], r.replacements[b]) } else { - bi[0] = b - bi = bi[1:] + buf[j] = b + j++ } } return string(buf) @@ -492,7 +526,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro last := 0 for i := 0; i < len(s); i++ { b := s[i] - if r[b] == nil { + if r.replacements[b] == nil { continue } if last != i { @@ -503,7 +537,7 @@ func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err erro } } last = i + 1 - nw, err := w.Write(r[b]) + nw, err := w.Write(r.replacements[b]) n += nw if err != nil { return n, err