From 1d31f9b1e05d3766ed2132b5856d364d00c5fdf9 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Tue, 28 Apr 2020 16:54:44 -0700 Subject: [PATCH] strconv: implement parseFloatPrefix returning no. of bytes consumed MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit parseFloatPrefix will make it easier to implement ParseComplex. Verified that there's no relevant performance impact: Benchmarks run on a "quiet" MacBook Pro, 3.3GHz Dual-Core Intel Core i7, with 16GB 2133MHz LPDDR3 RAM running macOS 10.15.4. name old time/op new time/op delta Atof64Decimal-4 38.2ns ± 4% 38.4ns ± 3% ~ (p=0.802 n=5+5) Atof64Float-4 41.1ns ± 3% 43.0ns ± 1% +4.77% (p=0.008 n=5+5) Atof64FloatExp-4 71.9ns ± 3% 70.1ns ± 1% ~ (p=0.063 n=5+5) Atof64Big-4 124ns ± 5% 119ns ± 0% ~ (p=0.143 n=5+4) Atof64RandomBits-4 57.2ns ± 1% 55.7ns ± 2% -2.66% (p=0.016 n=4+5) Atof64RandomFloats-4 56.8ns ± 1% 56.9ns ± 4% ~ (p=0.556 n=4+5) Atof32Decimal-4 35.4ns ± 5% 35.9ns ± 0% ~ (p=0.127 n=5+5) Atof32Float-4 39.6ns ± 7% 40.3ns ± 1% ~ (p=0.135 n=5+5) Atof32FloatExp-4 73.7ns ± 7% 71.9ns ± 0% ~ (p=0.175 n=5+4) Atof32Random-4 103ns ± 6% 98ns ± 2% -5.03% (p=0.008 n=5+5) Updates #36771. Change-Id: I8ff66b582ae8b468d89c9ffc35c569c735cf0341 Reviewed-on: https://go-review.googlesource.com/c/go/+/230737 Reviewed-by: Ian Lance Taylor --- src/strconv/atof.go | 146 +++++++++++++++++++---------------- src/strconv/atof_test.go | 21 +++++ src/strconv/internal_test.go | 4 + 3 files changed, 104 insertions(+), 67 deletions(-) diff --git a/src/strconv/atof.go b/src/strconv/atof.go index 23de70b1c9..c1e9907e09 100644 --- a/src/strconv/atof.go +++ b/src/strconv/atof.go @@ -14,51 +14,57 @@ import "math" var optimize = true // set to false to force slow-path conversions for testing -func equalIgnoreCase(s1, s2 string) bool { - if len(s1) != len(s2) { - return false - } - for i := 0; i < len(s1); i++ { - c1 := s1[i] - if 'A' <= c1 && c1 <= 'Z' { - c1 += 'a' - 'A' +// commonPrefixLenIgnoreCase returns the length of the common +// prefix of s and prefix, with the character case of s ignored. +// The prefix argument must be all lower-case. +func commonPrefixLenIgnoreCase(s, prefix string) int { + n := len(prefix) + if n > len(s) { + n = len(s) + } + for i := 0; i < n; i++ { + c := s[i] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' } - c2 := s2[i] - if 'A' <= c2 && c2 <= 'Z' { - c2 += 'a' - 'A' - } - if c1 != c2 { - return false + if c != prefix[i] { + return i } } - return true + return n } -func special(s string) (f float64, ok bool) { +// special returns the floating-point value for the special, +// possibly signed floating-point representations inf, infinity, +// and NaN. The result is ok if a prefix of s contains one +// of these representations and n is the length of that prefix. +// The character case is ignored. +func special(s string) (f float64, n int, ok bool) { if len(s) == 0 { - return + return 0, 0, false } + sign := 1 + nsign := 0 switch s[0] { - default: - return - case '+': - if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") { - return math.Inf(1), true + case '+', '-': + if s[0] == '-' { + sign = -1 } - case '-': - if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") { - return math.Inf(-1), true + nsign = 1 + s = s[1:] + fallthrough + case 'i', 'I': + n := commonPrefixLenIgnoreCase(s, "infinity") + // both "inf" and "infinity" are ok + if n == 3 || n == 8 { + return math.Inf(sign), nsign + n, true } case 'n', 'N': - if equalIgnoreCase(s, "nan") { - return math.NaN(), true - } - case 'i', 'I': - if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") { - return math.Inf(1), true + if commonPrefixLenIgnoreCase(s, "nan") == 3 { + return math.NaN(), 3, true } } - return + return 0, 0, false } func (b *decimal) set(s string) (ok bool) { @@ -158,11 +164,11 @@ func (b *decimal) set(s string) (ok bool) { return } -// readFloat reads a decimal mantissa and exponent from a float -// string representation. It returns ok==false if the number -// is invalid. -func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { - i := 0 +// readFloat reads a decimal or hexadecimal mantissa and exponent from a float +// string representation in s; the number may be followed by other characters. +// readFloat reports the number of bytes consumed (i), and whether the number +// is valid (ok). +func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex bool, i int, ok bool) { underscores := false // optional sign @@ -193,6 +199,7 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { nd := 0 ndMant := 0 dp := 0 +loop: for ; i < len(s); i++ { switch c := s[i]; true { case c == '_': @@ -201,7 +208,7 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { case c == '.': if sawdot { - return + break loop } sawdot = true dp = nd @@ -285,10 +292,6 @@ func readFloat(s string) (mantissa uint64, exp int, neg, trunc, hex, ok bool) { return } - if i != len(s) { - return - } - if mantissa != 0 { exp = dp - ndMant } @@ -554,26 +557,26 @@ func atofHex(s string, flt *floatInfo, mantissa uint64, exp int, neg, trunc bool const fnParseFloat = "ParseFloat" -func atof32(s string) (f float32, err error) { - if val, ok := special(s); ok { - return float32(val), nil +func atof32(s string) (f float32, n int, err error) { + if val, n, ok := special(s); ok { + return float32(val), n, nil } - mantissa, exp, neg, trunc, hex, ok := readFloat(s) + mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) if !ok { - return 0, syntaxError(fnParseFloat, s) + return 0, n, syntaxError(fnParseFloat, s) } if hex { - f, err := atofHex(s, &float32info, mantissa, exp, neg, trunc) - return float32(f), err + f, err := atofHex(s[:n], &float32info, mantissa, exp, neg, trunc) + return float32(f), n, err } if optimize { // Try pure floating-point arithmetic conversion. if !trunc { if f, ok := atof32exact(mantissa, exp, neg); ok { - return f, nil + return f, n, nil } } // Try another fast path. @@ -584,42 +587,43 @@ func atof32(s string) (f float32, err error) { if ovf { err = rangeError(fnParseFloat, s) } - return f, err + return f, n, err } } // Slow fallback. var d decimal - if !d.set(s) { - return 0, syntaxError(fnParseFloat, s) + if !d.set(s[:n]) { + return 0, n, syntaxError(fnParseFloat, s) } b, ovf := d.floatBits(&float32info) f = math.Float32frombits(uint32(b)) if ovf { err = rangeError(fnParseFloat, s) } - return f, err + return f, n, err } -func atof64(s string) (f float64, err error) { - if val, ok := special(s); ok { - return val, nil +func atof64(s string) (f float64, n int, err error) { + if val, n, ok := special(s); ok { + return val, n, nil } - mantissa, exp, neg, trunc, hex, ok := readFloat(s) + mantissa, exp, neg, trunc, hex, n, ok := readFloat(s) if !ok { - return 0, syntaxError(fnParseFloat, s) + return 0, n, syntaxError(fnParseFloat, s) } if hex { - return atofHex(s, &float64info, mantissa, exp, neg, trunc) + f, err := atofHex(s[:n], &float64info, mantissa, exp, neg, trunc) + return f, n, err } if optimize { // Try pure floating-point arithmetic conversion. if !trunc { if f, ok := atof64exact(mantissa, exp, neg); ok { - return f, nil + return f, n, nil } } // Try another fast path. @@ -630,21 +634,21 @@ func atof64(s string) (f float64, err error) { if ovf { err = rangeError(fnParseFloat, s) } - return f, err + return f, n, err } } // Slow fallback. var d decimal - if !d.set(s) { - return 0, syntaxError(fnParseFloat, s) + if !d.set(s[:n]) { + return 0, n, syntaxError(fnParseFloat, s) } b, ovf := d.floatBits(&float64info) f = math.Float64frombits(b) if ovf { err = rangeError(fnParseFloat, s) } - return f, err + return f, n, err } // ParseFloat converts the string s to a floating-point number @@ -672,9 +676,17 @@ func atof64(s string) (f float64, err error) { // ParseFloat recognizes the strings "NaN", "+Inf", and "-Inf" as their // respective special floating point values. It ignores case when matching. func ParseFloat(s string, bitSize int) (float64, error) { + f, n, err := parseFloatPrefix(s, bitSize) + if err == nil && n != len(s) { + return 0, syntaxError(fnParseFloat, s) + } + return f, err +} + +func parseFloatPrefix(s string, bitSize int) (float64, int, error) { if bitSize == 32 { - f, err := atof32(s) - return float64(f), err + f, n, err := atof32(s) + return float64(f), n, err } return atof64(s) } diff --git a/src/strconv/atof_test.go b/src/strconv/atof_test.go index abe6c64466..8201e75af6 100644 --- a/src/strconv/atof_test.go +++ b/src/strconv/atof_test.go @@ -479,6 +479,27 @@ func initAtofOnce() { } } +func TestParseFloatPrefix(t *testing.T) { + for i := 0; i < len(atoftests); i++ { + test := &atoftests[i] + if test.err != nil { + continue + } + // Adding characters that do not extend a number should not invalidate it. + // Test a few. + for _, suffix := range []string{" ", "q", "+", "-", "<", "=", ">", "(", ")"} { + in := test.in + suffix + _, n, err := ParseFloatPrefix(in, 64) + if err != nil { + t.Errorf("ParseFloatPrefix(%q, 64): err = %v; want no error", in, err) + } + if n != len(test.in) { + t.Errorf("ParseFloatPrefix(%q, 64): n = %d; want %d", in, n, len(test.in)) + } + } + } +} + func testAtof(t *testing.T, opt bool) { initAtof() oldopt := SetOptimize(opt) diff --git a/src/strconv/internal_test.go b/src/strconv/internal_test.go index d0fa80edfb..bb4a418b30 100644 --- a/src/strconv/internal_test.go +++ b/src/strconv/internal_test.go @@ -17,3 +17,7 @@ func SetOptimize(b bool) bool { optimize = b return old } + +func ParseFloatPrefix(s string, bitSize int) (float64, int, error) { + return parseFloatPrefix(s, bitSize) +} -- 2.48.1