From cf26fbb1f6d9644f447342f42d2dddcbe9ceda61 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 26 Aug 2021 12:02:47 -0700 Subject: [PATCH] strconv: optimize Parse for []byte arguments MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When one has a []byte on hand, but desires to call the Parse functions, the conversion from []byte to string would allocate. var b []byte = ... v, err := strconv.ParseXXX(string(b), ...) This changes it such that the input string never escapes from any of the Parse functions. Together with the compiler optimization where the compiler stack allocates any string smaller than 32B this makes most valid inputs for strconv.ParseXXX(string(b), ...) not require an allocation for the input string. For example, the longest int64 or uint64 encoded in decimal is 20B. Also, the longest decimal formatting of a float64 in appendix B of RFC 8785 is 25B. Previously, this was not possible since the input leaked to the error, which causes the prover to give up and instead heap copy the []byte. We fix this by copying the input string in the error case. The advantage of this change is that you can now call strconv.ParseXXX with a []byte without allocations (most times) in the non-error case. The detriment is that the error-case now has an extra allocation. We should optimize for the non-error path, rather than the error path. The effects of this change is transitively seen through packages that must use strconv.ParseXXX on a []byte such as "encoding/json": name old time/op new time/op delta UnmarshalFloat64 186ns 157ns -15.89% (p=0.000 n=10+10) name old alloc/op new alloc/op delta UnmarshalFloat64 148B 144B -2.70% (p=0.000 n=10+10) name old allocs/op new allocs/op delta UnmarshalFloat64 2.00 1.00 -50.00% (p=0.000 n=10+10) In order for "encoding/json" to benefit, there needs to be a small change made to how "encoding/json" calls strconv.ParseXXX. That will be a future change. Credit goes to Jeff Wendling for a similar patch. Fixes #42429 Change-Id: I512d6927f965f82e95bd7ec14a28a587f23b7203 Reviewed-on: https://go-review.googlesource.com/c/go/+/345488 Reviewed-by: Martin Möhrmann Run-TryBot: Joseph Tsai TryBot-Result: Gopher Robot Reviewed-by: Brad Fitzpatrick Auto-Submit: Joseph Tsai Reviewed-by: Robert Griesemer Reviewed-by: David Chase Reviewed-by: Ian Lance Taylor --- src/strconv/atoc.go | 2 +- src/strconv/atoi.go | 30 +++++++++++++----- src/strconv/strconv_test.go | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/strconv/atoc.go b/src/strconv/atoc.go index 85c7bafefa..f6fdd14e64 100644 --- a/src/strconv/atoc.go +++ b/src/strconv/atoc.go @@ -11,7 +11,7 @@ const fnParseComplex = "ParseComplex" func convErr(err error, s string) (syntax, range_ error) { if x, ok := err.(*NumError); ok { x.Func = fnParseComplex - x.Num = s + x.Num = cloneString(s) if x.Err == ErrRange { return nil, x } diff --git a/src/strconv/atoi.go b/src/strconv/atoi.go index be08f93356..520d826323 100644 --- a/src/strconv/atoi.go +++ b/src/strconv/atoi.go @@ -33,20 +33,36 @@ func (e *NumError) Error() string { func (e *NumError) Unwrap() error { return e.Err } +// cloneString returns a string copy of x. +// +// All ParseXXX functions allow the input string to escape to the error value. +// This hurts strconv.ParseXXX(string(b)) calls where b is []byte since +// the conversion from []byte must allocate a string on the heap. +// If we assume errors are infrequent, then we can avoid escaping the input +// back to the output by copying it first. This allows the compiler to call +// strconv.ParseXXX without a heap allocation for most []byte to string +// conversions, since it can now prove that the string cannot escape Parse. +// +// TODO: Use strings.Clone instead? However, we cannot depend on "strings" +// since it incurs a transitive dependency on "unicode". +// Either move strings.Clone to an internal/bytealg or make the +// "strings" to "unicode" dependency lighter (see https://go.dev/issue/54098). +func cloneString(x string) string { return string([]byte(x)) } + func syntaxError(fn, str string) *NumError { - return &NumError{fn, str, ErrSyntax} + return &NumError{fn, cloneString(str), ErrSyntax} } func rangeError(fn, str string) *NumError { - return &NumError{fn, str, ErrRange} + return &NumError{fn, cloneString(str), ErrRange} } func baseError(fn, str string, base int) *NumError { - return &NumError{fn, str, errors.New("invalid base " + Itoa(base))} + return &NumError{fn, cloneString(str), errors.New("invalid base " + Itoa(base))} } func bitSizeError(fn, str string, bitSize int) *NumError { - return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))} + return &NumError{fn, cloneString(str), errors.New("invalid bit size " + Itoa(bitSize))} } const intSize = 32 << (^uint(0) >> 63) @@ -205,7 +221,7 @@ func ParseInt(s string, base int, bitSize int) (i int64, err error) { un, err = ParseUint(s, base, bitSize) if err != nil && err.(*NumError).Err != ErrRange { err.(*NumError).Func = fnParseInt - err.(*NumError).Num = s0 + err.(*NumError).Num = cloneString(s0) return 0, err } @@ -239,7 +255,7 @@ func Atoi(s string) (int, error) { if s[0] == '-' || s[0] == '+' { s = s[1:] if len(s) < 1 { - return 0, &NumError{fnAtoi, s0, ErrSyntax} + return 0, syntaxError(fnAtoi, s0) } } @@ -247,7 +263,7 @@ func Atoi(s string) (int, error) { for _, ch := range []byte(s) { ch -= '0' if ch > 9 { - return 0, &NumError{fnAtoi, s0, ErrSyntax} + return 0, syntaxError(fnAtoi, s0) } n = n*10 + int(ch) } diff --git a/src/strconv/strconv_test.go b/src/strconv/strconv_test.go index d3c1e953de..41b8fa7e33 100644 --- a/src/strconv/strconv_test.go +++ b/src/strconv/strconv_test.go @@ -66,6 +66,68 @@ func TestCountMallocs(t *testing.T) { } } +// Sink makes sure the compiler cannot optimize away the benchmarks. +var Sink struct { + Bool bool + Int int + Int64 int64 + Uint64 uint64 + Float64 float64 + Complex128 complex128 + Error error + Bytes []byte +} + +func TestAllocationsFromBytes(t *testing.T) { + const runsPerTest = 100 + bytes := struct{ Bool, Number, String, Buffer []byte }{ + Bool: []byte("false"), + Number: []byte("123456789"), + String: []byte("hello, world!"), + Buffer: make([]byte, 1024), + } + + checkNoAllocs := func(f func()) func(t *testing.T) { + return func(t *testing.T) { + t.Helper() + if allocs := testing.AllocsPerRun(runsPerTest, f); allocs != 0 { + t.Errorf("got %v allocs, want 0 allocs", allocs) + } + } + } + + t.Run("Atoi", checkNoAllocs(func() { + Sink.Int, Sink.Error = Atoi(string(bytes.Number)) + })) + t.Run("ParseBool", checkNoAllocs(func() { + Sink.Bool, Sink.Error = ParseBool(string(bytes.Bool)) + })) + t.Run("ParseInt", checkNoAllocs(func() { + Sink.Int64, Sink.Error = ParseInt(string(bytes.Number), 10, 64) + })) + t.Run("ParseUint", checkNoAllocs(func() { + Sink.Uint64, Sink.Error = ParseUint(string(bytes.Number), 10, 64) + })) + t.Run("ParseFloat", checkNoAllocs(func() { + Sink.Float64, Sink.Error = ParseFloat(string(bytes.Number), 64) + })) + t.Run("ParseComplex", checkNoAllocs(func() { + Sink.Complex128, Sink.Error = ParseComplex(string(bytes.Number), 128) + })) + t.Run("CanBackquote", checkNoAllocs(func() { + Sink.Bool = CanBackquote(string(bytes.String)) + })) + t.Run("AppendQuote", checkNoAllocs(func() { + Sink.Bytes = AppendQuote(bytes.Buffer[:0], string(bytes.String)) + })) + t.Run("AppendQuoteToASCII", checkNoAllocs(func() { + Sink.Bytes = AppendQuoteToASCII(bytes.Buffer[:0], string(bytes.String)) + })) + t.Run("AppendQuoteToGraphic", checkNoAllocs(func() { + Sink.Bytes = AppendQuoteToGraphic(bytes.Buffer[:0], string(bytes.String)) + })) +} + func TestErrorPrefixes(t *testing.T) { _, errInt := Atoi("INVALID") _, errBool := ParseBool("INVALID") -- 2.50.0