From 9d73a6dcad11238dfd31a39a4d5a9e63bbc1d207 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Justin=20Nu=C3=9F?= Date: Tue, 26 Jan 2016 21:14:35 +0100 Subject: [PATCH] strconv: Avoid allocation in AppendQuote* The current implementations of the AppendQuote functions use quoteWith (through Quote) for quoting the given value and appends the returned string to the dst byte slice. quoteWith internally creates a byte slice on each call which gets converted to a string in Quote. This means the AppendQuote functions always allocates a new byte slice and a string only to append them to an existing byte slice. In the case of (Append)QuoteRune the string passed to quoteWith will also needs to be allocated from a rune first. Split quoteWith into two functions (quoteWith and appendQuotedWith) and replace the call to Quote inside AppendQuote with appendQuotedWith, which appends directly to the byte slice passed to AppendQuote and also avoids the []byte->string conversion. Also introduce the 2 functions quoteRuneWith and appendQuotedRuneWith that work the same way as quoteWith and appendQuotedWith, but take a single rune instead of a string, to avoid allocating a new string when appending a single rune, and use them in (Append)QuoteRune. Also update the ToASCII and ToGraphic variants to use the new functions. Benchmark results: benchmark old ns/op new ns/op delta BenchmarkQuote-8 428 503 +17.52% BenchmarkQuoteRune-8 148 105 -29.05% BenchmarkAppendQuote-8 435 307 -29.43% BenchmarkAppendQuoteRune-8 158 23.5 -85.13% benchmark old allocs new allocs delta BenchmarkQuote-8 3 3 +0.00% BenchmarkQuoteRune-8 3 2 -33.33% BenchmarkAppendQuote-8 3 0 -100.00% BenchmarkAppendQuoteRune-8 3 0 -100.00% benchmark old bytes new bytes delta BenchmarkQuote-8 144 144 +0.00% BenchmarkQuoteRune-8 16 16 +0.00% BenchmarkAppendQuote-8 144 0 -100.00% BenchmarkAppendQuoteRune-8 16 0 -100.00% Change-Id: I77c148d5c7242f1b0edbbeeea184878abb51a522 Reviewed-on: https://go-review.googlesource.com/18962 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/strconv/quote.go | 146 +++++++++++++++++++++----------------- src/strconv/quote_test.go | 28 ++++++++ 2 files changed, 109 insertions(+), 65 deletions(-) diff --git a/src/strconv/quote.go b/src/strconv/quote.go index 40d0667551..a37a309f26 100644 --- a/src/strconv/quote.go +++ b/src/strconv/quote.go @@ -6,15 +6,19 @@ package strconv -import ( - "unicode/utf8" -) +import "unicode/utf8" const lowerhex = "0123456789abcdef" func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { - var runeTmp [utf8.UTFMax]byte - buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. + return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) +} + +func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { + return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) +} + +func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { buf = append(buf, quote) for width := 0; len(s) > 0; s = s[width:] { r := rune(s[0]) @@ -28,61 +32,76 @@ func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { buf = append(buf, lowerhex[s[0]&0xF]) continue } - if r == rune(quote) || r == '\\' { // always backslashed - buf = append(buf, '\\') + buf = appendEscapedRune(buf, r, width, quote, ASCIIonly, graphicOnly) + } + buf = append(buf, quote) + return buf +} + +func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { + buf = append(buf, quote) + if !utf8.ValidRune(r) { + r = utf8.RuneError + } + buf = appendEscapedRune(buf, r, utf8.RuneLen(r), quote, ASCIIonly, graphicOnly) + buf = append(buf, quote) + return buf +} + +func appendEscapedRune(buf []byte, r rune, width int, quote byte, ASCIIonly, graphicOnly bool) []byte { + var runeTmp [utf8.UTFMax]byte + if r == rune(quote) || r == '\\' { // always backslashed + buf = append(buf, '\\') + buf = append(buf, byte(r)) + return buf + } + if ASCIIonly { + if r < utf8.RuneSelf && IsPrint(r) { buf = append(buf, byte(r)) - continue + return buf } - if ASCIIonly { - if r < utf8.RuneSelf && IsPrint(r) { - buf = append(buf, byte(r)) - continue + } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { + n := utf8.EncodeRune(runeTmp[:], r) + buf = append(buf, runeTmp[:n]...) + return buf + } + switch r { + case '\a': + buf = append(buf, `\a`...) + case '\b': + buf = append(buf, `\b`...) + case '\f': + buf = append(buf, `\f`...) + case '\n': + buf = append(buf, `\n`...) + case '\r': + buf = append(buf, `\r`...) + case '\t': + buf = append(buf, `\t`...) + case '\v': + buf = append(buf, `\v`...) + default: + switch { + case r < ' ': + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[byte(r)>>4]) + buf = append(buf, lowerhex[byte(r)&0xF]) + case r > utf8.MaxRune: + r = 0xFFFD + fallthrough + case r < 0x10000: + buf = append(buf, `\u`...) + for s := 12; s >= 0; s -= 4 { + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } - } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { - n := utf8.EncodeRune(runeTmp[:], r) - buf = append(buf, runeTmp[:n]...) - continue - } - switch r { - case '\a': - buf = append(buf, `\a`...) - case '\b': - buf = append(buf, `\b`...) - case '\f': - buf = append(buf, `\f`...) - case '\n': - buf = append(buf, `\n`...) - case '\r': - buf = append(buf, `\r`...) - case '\t': - buf = append(buf, `\t`...) - case '\v': - buf = append(buf, `\v`...) default: - switch { - case r < ' ': - buf = append(buf, `\x`...) - buf = append(buf, lowerhex[s[0]>>4]) - buf = append(buf, lowerhex[s[0]&0xF]) - case r > utf8.MaxRune: - r = 0xFFFD - fallthrough - case r < 0x10000: - buf = append(buf, `\u`...) - for s := 12; s >= 0; s -= 4 { - buf = append(buf, lowerhex[r>>uint(s)&0xF]) - } - default: - buf = append(buf, `\U`...) - for s := 28; s >= 0; s -= 4 { - buf = append(buf, lowerhex[r>>uint(s)&0xF]) - } + buf = append(buf, `\U`...) + for s := 28; s >= 0; s -= 4 { + buf = append(buf, lowerhex[r>>uint(s)&0xF]) } } } - buf = append(buf, quote) - return string(buf) - + return buf } // Quote returns a double-quoted Go string literal representing s. The @@ -96,7 +115,7 @@ func Quote(s string) string { // AppendQuote appends a double-quoted Go string literal representing s, // as generated by Quote, to dst and returns the extended buffer. func AppendQuote(dst []byte, s string) []byte { - return append(dst, Quote(s)...) + return appendQuotedWith(dst, s, '"', false, false) } // QuoteToASCII returns a double-quoted Go string literal representing s. @@ -109,7 +128,7 @@ func QuoteToASCII(s string) string { // AppendQuoteToASCII appends a double-quoted Go string literal representing s, // as generated by QuoteToASCII, to dst and returns the extended buffer. func AppendQuoteToASCII(dst []byte, s string) []byte { - return append(dst, QuoteToASCII(s)...) + return appendQuotedWith(dst, s, '"', true, false) } // QuoteToGraphic returns a double-quoted Go string literal representing s. @@ -122,21 +141,20 @@ func QuoteToGraphic(s string) string { // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, // as generated by QuoteToGraphic, to dst and returns the extended buffer. func AppendQuoteToGraphic(dst []byte, s string) []byte { - return append(dst, QuoteToGraphic(s)...) + return appendQuotedWith(dst, s, '"', false, true) } // QuoteRune returns a single-quoted Go character literal representing the // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) // for control characters and non-printable characters as defined by IsPrint. func QuoteRune(r rune) string { - // TODO: avoid the allocation here. - return quoteWith(string(r), '\'', false, false) + return quoteRuneWith(r, '\'', false, false) } // AppendQuoteRune appends a single-quoted Go character literal representing the rune, // as generated by QuoteRune, to dst and returns the extended buffer. func AppendQuoteRune(dst []byte, r rune) []byte { - return append(dst, QuoteRune(r)...) + return appendQuotedRuneWith(dst, r, '\'', false, false) } // QuoteRuneToASCII returns a single-quoted Go character literal representing @@ -144,14 +162,13 @@ func AppendQuoteRune(dst []byte, r rune) []byte { // \u0100) for non-ASCII characters and non-printable characters as defined // by IsPrint. func QuoteRuneToASCII(r rune) string { - // TODO: avoid the allocation here. - return quoteWith(string(r), '\'', true, false) + return quoteRuneWith(r, '\'', true, false) } // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { - return append(dst, QuoteRuneToASCII(r)...) + return appendQuotedRuneWith(dst, r, '\'', true, false) } // QuoteRuneToGraphic returns a single-quoted Go character literal representing @@ -159,14 +176,13 @@ func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { // \u0100) for non-ASCII characters and non-printable characters as defined // by IsGraphic. func QuoteRuneToGraphic(r rune) string { - // TODO: avoid the allocation here. - return quoteWith(string(r), '\'', false, true) + return quoteRuneWith(r, '\'', false, true) } // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { - return append(dst, QuoteRuneToGraphic(r)...) + return appendQuotedRuneWith(dst, r, '\'', false, true) } // CanBackquote reports whether the string s can be represented diff --git a/src/strconv/quote_test.go b/src/strconv/quote_test.go index 3e8ec2c98f..10735e316c 100644 --- a/src/strconv/quote_test.go +++ b/src/strconv/quote_test.go @@ -89,6 +89,34 @@ func TestQuoteToGraphic(t *testing.T) { } } +func BenchmarkQuote(b *testing.B) { + for i := 0; i < b.N; i++ { + Quote("\a\b\f\r\n\t\v\a\b\f\r\n\t\v\a\b\f\r\n\t\v") + } +} + +func BenchmarkQuoteRune(b *testing.B) { + for i := 0; i < b.N; i++ { + QuoteRune('\a') + } +} + +var benchQuoteBuf []byte + +func BenchmarkAppendQuote(b *testing.B) { + for i := 0; i < b.N; i++ { + benchQuoteBuf = AppendQuote(benchQuoteBuf[:0], "\a\b\f\r\n\t\v\a\b\f\r\n\t\v\a\b\f\r\n\t\v") + } +} + +var benchQuoteRuneBuf []byte + +func BenchmarkAppendQuoteRune(b *testing.B) { + for i := 0; i < b.N; i++ { + benchQuoteRuneBuf = AppendQuoteRune(benchQuoteRuneBuf[:0], '\a') + } +} + type quoteRuneTest struct { in rune out string -- 2.48.1