return &Replacer{r: &r}
}
- r := byteStringReplacer{}
+ r := byteStringReplacer{toReplace: make([]string, 0, len(oldnew)/2)}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1]
- r[o] = []byte(n)
+ // To avoid counting repetitions multiple times.
+ if r.replacements[o] == nil {
+ // We need to use string([]byte{o}) instead of string(o),
+ // to avoid utf8 encoding of o.
+ // E. g. byte(150) produces string of length 2.
+ r.toReplace = append(r.toReplace, string([]byte{o}))
+ }
+ r.replacements[o] = []byte(n)
+
}
return &Replacer{r: &r}
}
// byteStringReplacer is the implementation that's used when all the
// "old" values are single ASCII bytes but the "new" values vary in size.
-// The array contains replacement byte slices indexed by old byte.
-// A nil []byte means that the old byte should not be replaced.
-type byteStringReplacer [256][]byte
+type byteStringReplacer struct {
+ // replacements contains replacement byte slices indexed by old byte.
+ // A nil []byte means that the old byte should not be replaced.
+ replacements [256][]byte
+ // toReplace keeps a list of bytes to replace. Depending on length of toReplace
+ // and length of target string it may be faster to use Count, or a plain loop.
+ // We store single byte as a string, because Count takes a string.
+ toReplace []string
+}
+
+// countCutOff controls the ratio of a string length to a number of replacements
+// at which (*byteStringReplacer).Replace switches algorithms.
+// For strings with higher ration of length to replacements than that value,
+// we call Count, for each replacement from toReplace.
+// For strings, with a lower ratio we use simple loop, because of Count overhead.
+// countCutOff is an empirically determined overhead multiplier.
+// TODO(tocarip) revisit once we have register-based abi/mid-stack inlining.
+const countCutOff = 8
func (r *byteStringReplacer) Replace(s string) string {
newSize := len(s)
anyChanges := false
- for i := 0; i < len(s); i++ {
- b := s[i]
- if r[b] != nil {
- anyChanges = true
- // The -1 is because we are replacing 1 byte with len(r[b]) bytes.
- newSize += len(r[b]) - 1
+ // Is it faster to use Count?
+ if len(r.toReplace)*countCutOff <= len(s) {
+ for _, x := range r.toReplace {
+ if c := Count(s, x); c != 0 {
+ // The -1 is because we are replacing 1 byte with len(replacements[b]) bytes.
+ newSize += c * (len(r.replacements[x[0]]) - 1)
+ anyChanges = true
+ }
+
+ }
+ } else {
+ for i := 0; i < len(s); i++ {
+ b := s[i]
+ if r.replacements[b] != nil {
+ // See above for explanation of -1
+ newSize += len(r.replacements[b]) - 1
+ anyChanges = true
+ }
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
- bi := buf
+ j := 0
for i := 0; i < len(s); i++ {
b := s[i]
- if r[b] != nil {
- n := copy(bi, r[b])
- bi = bi[n:]
+ if r.replacements[b] != nil {
+ j += copy(buf[j:], r.replacements[b])
} else {
- bi[0] = b
- bi = bi[1:]
+ buf[j] = b
+ j++
}
}
return string(buf)
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
- if r[b] == nil {
+ if r.replacements[b] == nil {
continue
}
if last != i {
}
}
last = i + 1
- nw, err := w.Write(r[b])
+ nw, err := w.Write(r.replacements[b])
n += nw
if err != nil {
return n, err