From bba7396fbd4c3245dbedd3cc2a1fb25137331ebb Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 3 Oct 2011 15:19:04 -0700 Subject: [PATCH] strings: implement a faster byte->string Replacer This implements a replacer for when all old strings are single bytes, but new values are not. BenchmarkHTMLEscapeNew 1000000 1090 ns/op BenchmarkHTMLEscapeOld 1000000 2049 ns/op R=rsc CC=golang-dev https://golang.org/cl/5176043 --- src/pkg/http/header.go | 5 +- src/pkg/http/server.go | 15 +++-- src/pkg/mime/multipart/writer.go | 6 +- src/pkg/strings/replace.go | 110 ++++++++++++++++++++++++++++++- src/pkg/strings/replace_test.go | 32 ++++++++- 5 files changed, 154 insertions(+), 14 deletions(-) diff --git a/src/pkg/http/header.go b/src/pkg/http/header.go index 08b0771304..aaaa92a2ef 100644 --- a/src/pkg/http/header.go +++ b/src/pkg/http/header.go @@ -47,6 +47,8 @@ func (h Header) Write(w io.Writer) os.Error { return h.WriteSubset(w, nil) } +var headerNewlineToSpace = strings.NewReplacer("\n", " ", "\r", " ") + // WriteSubset writes a header in wire format. // If exclude is not nil, keys where exclude[key] == true are not written. func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error { @@ -59,8 +61,7 @@ func (h Header) WriteSubset(w io.Writer, exclude map[string]bool) os.Error { sort.Strings(keys) for _, k := range keys { for _, v := range h[k] { - v = strings.Replace(v, "\n", " ", -1) - v = strings.Replace(v, "\r", " ", -1) + v = headerNewlineToSpace.Replace(v) v = strings.TrimSpace(v) if _, err := fmt.Fprintf(w, "%s: %s\r\n", k, v); err != nil { return err diff --git a/src/pkg/http/server.go b/src/pkg/http/server.go index 8326ff8be1..e8e23087e0 100644 --- a/src/pkg/http/server.go +++ b/src/pkg/http/server.go @@ -752,13 +752,16 @@ func Redirect(w ResponseWriter, r *Request, urlStr string, code int) { } } +var htmlReplacer = strings.NewReplacer( + "&", "&", + "<", "<", + ">", ">", + `"`, """, + "'", "'", +) + func htmlEscape(s string) string { - s = strings.Replace(s, "&", "&", -1) - s = strings.Replace(s, "<", "<", -1) - s = strings.Replace(s, ">", ">", -1) - s = strings.Replace(s, "\"", """, -1) - s = strings.Replace(s, "'", "'", -1) - return s + return htmlReplacer.Replace(s) } // Redirect to a fixed URL diff --git a/src/pkg/mime/multipart/writer.go b/src/pkg/mime/multipart/writer.go index 97a8897b29..1bff02fa2a 100644 --- a/src/pkg/mime/multipart/writer.go +++ b/src/pkg/mime/multipart/writer.go @@ -85,10 +85,10 @@ func (w *Writer) CreatePart(header textproto.MIMEHeader) (io.Writer, os.Error) { return p, nil } +var quoteEscaper = strings.NewReplacer("\\", "\\\\", `"`, "\\\"") + func escapeQuotes(s string) string { - s = strings.Replace(s, "\\", "\\\\", -1) - s = strings.Replace(s, "\"", "\\\"", -1) - return s + return quoteEscaper.Replace(s) } // CreateFormFile is a convenience wrapper around CreatePart. It creates diff --git a/src/pkg/strings/replace.go b/src/pkg/strings/replace.go index 8eab12d3c8..64a7f208b9 100644 --- a/src/pkg/strings/replace.go +++ b/src/pkg/strings/replace.go @@ -36,8 +36,12 @@ func NewReplacer(oldnew ...string) *Replacer { panic("strings.NewReplacer: odd argument count") } - var bb byteReplacer - var gen genericReplacer + // Possible implementations. + var ( + bb byteReplacer + bs byteStringReplacer + gen genericReplacer + ) allOldBytes, allNewBytes := true, true for len(oldnew) > 0 { @@ -49,7 +53,17 @@ func NewReplacer(oldnew ...string) *Replacer { if len(new) != 1 { allNewBytes = false } + + // generic gen.p = append(gen.p, pair{old, new}) + + // byte -> string + if allOldBytes { + bs.old.set(old[0]) + bs.new[old[0]] = []byte(new) + } + + // byte -> byte if allOldBytes && allNewBytes { bb.old.set(old[0]) bb.new[old[0]] = new[0] @@ -59,6 +73,9 @@ func NewReplacer(oldnew ...string) *Replacer { if allOldBytes && allNewBytes { return &Replacer{r: &bb} } + if allOldBytes { + return &Replacer{r: &bs} + } return &Replacer{r: &gen} } @@ -176,6 +193,7 @@ func (r *byteReplacer) Replace(s string) string { } func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. bufsize := 32 << 10 if len(s) < bufsize { bufsize = len(s) @@ -199,6 +217,94 @@ func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) return n, nil } +// byteStringReplacer is the implementation that's used when all the +// "old" values are single ASCII bytes but the "new" values vary in +// size. +type byteStringReplacer struct { + // old has a bit set for each old byte that should be replaced. + old byteBitmap + + // replacement string, indexed by old byte. only valid if + // corresponding old bit is set. + new [256][]byte +} + +func (r *byteStringReplacer) Replace(s string) string { + newSize := 0 + anyChanges := false + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + anyChanges = true + newSize += len(r.new[b]) + } else { + newSize++ + } + } + if !anyChanges { + return s + } + buf := make([]byte, newSize) + bi := buf + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + n := copy(bi[:], r.new[b]) + bi = bi[n:] + } else { + bi[0] = b + bi = bi[1:] + } + } + return string(buf) +} + +// WriteString maintains one buffer that's at most 32KB. The bytes in +// s are enumerated and the buffer is filled. If it reaches its +// capacity or a byte has a replacement, the buffer is flushed to w. +func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s instead. + bufsize := 32 << 10 + if len(s) < bufsize { + bufsize = len(s) + } + buf := make([]byte, bufsize) + bi := buf[:0] + + for i := 0; i < len(s); i++ { + b := s[i] + var new []byte + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + new = r.new[b] + } else { + bi = append(bi, b) + } + if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + bi = buf[:0] + } + if len(new) > 0 { + nw, err := w.Write(new) + n += nw + if err != nil { + return n, err + } + } + } + if len(bi) > 0 { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + } + return n, nil +} + // strings is too low-level to import io/ioutil var discard io.Writer = devNull(0) diff --git a/src/pkg/strings/replace_test.go b/src/pkg/strings/replace_test.go index 20e734f6cd..e337856c64 100644 --- a/src/pkg/strings/replace_test.go +++ b/src/pkg/strings/replace_test.go @@ -41,14 +41,21 @@ var capitalLetters = NewReplacer("a", "A", "b", "B") var blankToXReplacer = NewReplacer("", "X", "o", "O") var ReplacerTests = []ReplacerTest{ + // byte->string {htmlEscaper, "No changes", "No changes"}, {htmlEscaper, "I <3 escaping & stuff", "I <3 escaping & stuff"}, {htmlEscaper, "&&&", "&&&"}, + + // generic {replacer, "fooaaabar", "foo3[aaa]b1[a]r"}, {replacer, "long, longerst, longer", "short, most long, medium"}, {replacer, "XiX", "YiY"}, + + // byte->byte {capitalLetters, "brad", "BrAd"}, {capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)}, + + // hitting "" special case {blankToXReplacer, "oo", "XOXOX"}, } @@ -84,7 +91,9 @@ type pickAlgorithmTest struct { var pickAlgorithmTests = []pickAlgorithmTest{ {capitalLetters, "*strings.byteReplacer"}, - {NewReplacer("a", "A", "b", "Bb"), "*strings.genericReplacer"}, + {NewReplacer("12", "123"), "*strings.genericReplacer"}, + {NewReplacer("1", "12"), "*strings.byteStringReplacer"}, + {htmlEscaper, "*strings.byteStringReplacer"}, } func TestPickAlgorithm(t *testing.T) { @@ -118,6 +127,27 @@ func BenchmarkByteByteMatch(b *testing.B) { } } +func BenchmarkByteStringMatch(b *testing.B) { + str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">" + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeNew(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + htmlEscaper.Replace(str) + } +} + +func BenchmarkHTMLEscapeOld(b *testing.B) { + str := "I <3 to escape HTML & other text too." + for i := 0; i < b.N; i++ { + oldhtmlEscape(str) + } +} + // BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces. func BenchmarkByteByteReplaces(b *testing.B) { str := Repeat("a", 100) + Repeat("b", 100) -- 2.48.1