From 31c96fc2276b54f844bb5a83f2b4f817f956d976 Mon Sep 17 00:00:00 2001 From: Josselin Costanzi Date: Sat, 7 Jan 2017 14:12:57 +0100 Subject: [PATCH] encoding/base64: Optimize DecodeString MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Optimize DecodeString for the common case where most of the input isn't a newline or a padding character. Also add some testcases found when fuzzing this implementation against upstream. Change Decode benchmark to run with different input sizes. name old time/op new time/op delta DecodeString/2-4 71.5ns ± 4% 70.0ns ± 6% ~ (p=0.246 n=5+5) DecodeString/4-4 112ns ±25% 91ns ± 2% ~ (p=0.056 n=5+5) DecodeString/8-4 136ns ± 5% 126ns ± 5% -7.33% (p=0.016 n=5+5) DecodeString/64-4 872ns ±29% 652ns ±21% -25.23% (p=0.032 n=5+5) DecodeString/8192-4 90.9µs ±21% 61.0µs ±13% -32.87% (p=0.008 n=5+5) name old speed new speed delta DecodeString/2-4 56.0MB/s ± 4% 57.2MB/s ± 6% ~ (p=0.310 n=5+5) DecodeString/4-4 73.4MB/s ±23% 87.7MB/s ± 2% ~ (p=0.056 n=5+5) DecodeString/8-4 87.8MB/s ± 5% 94.8MB/s ± 5% +7.98% (p=0.016 n=5+5) DecodeString/64-4 103MB/s ±24% 136MB/s ±19% +32.63% (p=0.032 n=5+5) DecodeString/8192-4 122MB/s ±19% 180MB/s ±11% +47.75% (p=0.008 n=5+5) Improves #19636 Change-Id: I39667f4fb682a12b3137946d017ad999553c5780 Reviewed-on: https://go-review.googlesource.com/34950 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/encoding/base64/base64.go | 45 ++++++++++++++++-------------- src/encoding/base64/base64_test.go | 21 +++++++++++--- 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/encoding/base64/base64.go b/src/encoding/base64/base64.go index 5a384315f9..b208f9e4d8 100644 --- a/src/encoding/base64/base64.go +++ b/src/encoding/base64/base64.go @@ -273,44 +273,50 @@ func (e CorruptInputError) Error() string { // indicates if end-of-message padding or a partial quantum was encountered // and thus any additional data is an error. func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { - var inIdx int si := 0 - // skip over newlines - for si < len(src) && (src[si] == '\n' || src[si] == '\r') { - si++ - } - for si < len(src) && !end { // Decode quantum using the base64 alphabet var dbuf [4]byte dinc, dlen := 3, 4 - for j := range dbuf { + for j := 0; j < len(dbuf); j++ { if len(src) == si { - if enc.padChar != NoPadding || j < 2 { + switch { + case j == 0: + return n, false, nil + case j == 1, enc.padChar != NoPadding: return n, false, CorruptInputError(si - j) } dinc, dlen, end = j-1, j, true break } in := src[si] - inIdx = si si++ - // skip over newlines - for si < len(src) && (src[si] == '\n' || src[si] == '\r') { - si++ + + out := enc.decodeMap[in] + if out != 0xFF { + dbuf[j] = out + continue } + if in == '\n' || in == '\r' { + j-- + continue + } if rune(in) == enc.padChar { // We've reached the end and there's padding switch j { case 0, 1: // incorrect padding - return n, false, CorruptInputError(inIdx) + return n, false, CorruptInputError(si - 1) case 2: // "==" is expected, the first "=" is already consumed. + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ + } if si == len(src) { // not enough padding return n, false, CorruptInputError(len(src)) @@ -321,10 +327,10 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { } si++ - // skip over newlines - for si < len(src) && (src[si] == '\n' || src[si] == '\r') { - si++ - } + } + // skip over newlines + for si < len(src) && (src[si] == '\n' || src[si] == '\r') { + si++ } if si < len(src) { // trailing garbage @@ -333,10 +339,7 @@ func (enc *Encoding) decode(dst, src []byte) (n int, end bool, err error) { dinc, dlen, end = 3, j, true break } - dbuf[j] = enc.decodeMap[in] - if dbuf[j] == 0xFF { - return n, false, CorruptInputError(inIdx) - } + return n, false, CorruptInputError(si - 1) } // Convert 4x 6bit source bytes into 3 bytes diff --git a/src/encoding/base64/base64_test.go b/src/encoding/base64/base64_test.go index ce52202dd8..8ebf2b1553 100644 --- a/src/encoding/base64/base64_test.go +++ b/src/encoding/base64/base64_test.go @@ -7,6 +7,7 @@ package base64 import ( "bytes" "errors" + "fmt" "io" "io/ioutil" "reflect" @@ -202,6 +203,9 @@ func TestDecodeCorrupt(t *testing.T) { offset int // -1 means no corruption. }{ {"", -1}, + {"\n", -1}, + {"AAA=\n", -1}, + {"AAAA\n", -1}, {"!!!!", 0}, {"====", 0}, {"x===", 1}, @@ -468,10 +472,19 @@ func BenchmarkEncodeToString(b *testing.B) { } func BenchmarkDecodeString(b *testing.B) { - data := StdEncoding.EncodeToString(make([]byte, 8192)) - b.SetBytes(int64(len(data))) - for i := 0; i < b.N; i++ { - StdEncoding.DecodeString(data) + sizes := []int{2, 4, 8, 64, 8192} + benchFunc := func(b *testing.B, benchSize int) { + data := StdEncoding.EncodeToString(make([]byte, benchSize)) + b.SetBytes(int64(len(data))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + StdEncoding.DecodeString(data) + } + } + for _, size := range sizes { + b.Run(fmt.Sprintf("%d", size), func(b *testing.B) { + benchFunc(b, size) + }) } } -- 2.48.1