From cad480440d4d826de6384d136e3c2e0072cb34b8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?R=C3=A9my=20Oudompheng?= Date: Sat, 21 Apr 2012 13:56:51 +0200 Subject: [PATCH] strconv: 2x-4x speed improvement for atof64. benchmark old ns/op new ns/op delta BenchmarkAtof64Decimal 344 71 -79.22% BenchmarkAtof64Float 397 90 -77.15% BenchmarkAtof64FloatExp 445 241 -45.84% BenchmarkAtof64Big 731 324 -55.68% BenchmarkAtof64RandomBits 761 453 -40.47% BenchmarkAtof64RandomFloats 690 314 -54.49% R=dave, rsc CC=golang-dev, remy https://golang.org/cl/5988053 --- src/pkg/strconv/atof.go | 236 ++++++++++++++++++++++---------- src/pkg/strconv/extfloat.go | 19 ++- src/pkg/strconv/strconv_test.go | 10 ++ 3 files changed, 183 insertions(+), 82 deletions(-) diff --git a/src/pkg/strconv/atof.go b/src/pkg/strconv/atof.go index cd3031b0e6..b43fab4f07 100644 --- a/src/pkg/strconv/atof.go +++ b/src/pkg/strconv/atof.go @@ -37,17 +37,28 @@ func equalIgnoreCase(s1, s2 string) bool { } func special(s string) (f float64, ok bool) { - switch { - case equalIgnoreCase(s, "nan"): - return math.NaN(), true - case equalIgnoreCase(s, "-inf"), - equalIgnoreCase(s, "-infinity"): - return math.Inf(-1), true - case equalIgnoreCase(s, "+inf"), - equalIgnoreCase(s, "+infinity"), - equalIgnoreCase(s, "inf"), - equalIgnoreCase(s, "infinity"): - return math.Inf(1), true + if len(s) == 0 { + return + } + switch s[0] { + default: + return + case '+': + if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") { + return math.Inf(1), true + } + case '-': + if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") { + return math.Inf(-1), true + } + case 'n', 'N': + if equalIgnoreCase(s, "nan") { + return math.NaN(), true + } + case 'i', 'I': + if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") { + return math.Inf(1), true + } } return } @@ -142,6 +153,105 @@ func (b *decimal) set(s string) (ok bool) { return } +// readFloat reads a decimal mantissa and exponent from a float +// string representation. It sets ok to false if the number could +// not fit return types or is invalid. +func readFloat(s string) (mantissa uint64, exp int, neg, trunc, ok bool) { + const uint64digits = 19 + i := 0 + + // optional sign + if i >= len(s) { + return + } + switch { + case s[i] == '+': + i++ + case s[i] == '-': + neg = true + i++ + } + + // digits + sawdot := false + sawdigits := false + nd := 0 + ndMant := 0 + dp := 0 + for ; i < len(s); i++ { + switch c := s[i]; true { + case c == '.': + if sawdot { + return + } + sawdot = true + dp = nd + continue + + case '0' <= c && c <= '9': + sawdigits = true + if c == '0' && nd == 0 { // ignore leading zeros + dp-- + continue + } + nd++ + if ndMant < uint64digits { + mantissa *= 10 + mantissa += uint64(c - '0') + ndMant++ + } else if s[i] != '0' { + trunc = true + } + continue + } + break + } + if !sawdigits { + return + } + if !sawdot { + dp = nd + } + + // optional exponent moves decimal point. + // if we read a very large, very long number, + // just be sure to move the decimal point by + // a lot (say, 100000). it doesn't matter if it's + // not the exact number. + if i < len(s) && (s[i] == 'e' || s[i] == 'E') { + i++ + if i >= len(s) { + return + } + esign := 1 + if s[i] == '+' { + i++ + } else if s[i] == '-' { + i++ + esign = -1 + } + if i >= len(s) || s[i] < '0' || s[i] > '9' { + return + } + e := 0 + for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ { + if e < 10000 { + e = e*10 + int(s[i]) - '0' + } + } + dp += e * esign + } + + if i != len(s) { + return + } + + exp = dp - ndMant + ok = true + return + +} + // decimal power of ten to binary power of two. var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26} @@ -243,19 +353,6 @@ out: return bits, overflow } -// Compute exact floating-point integer from d's digits. -// Caller is responsible for avoiding overflow. -func (d *decimal) atof64int() float64 { - f := 0.0 - for i := 0; i < d.nd; i++ { - f = f*10 + float64(d.d[i]-'0') - } - if d.neg { - f = -f - } - return f -} - func (d *decimal) atof32int() float32 { f := float32(0) for i := 0; i < d.nd; i++ { @@ -267,18 +364,6 @@ func (d *decimal) atof32int() float32 { return f } -// Reads a uint64 decimal mantissa, which might be truncated. -func (d *decimal) atou64() (mant uint64, digits int) { - const uint64digits = 19 - for i, c := range d.d[:d.nd] { - if i == uint64digits { - return mant, i - } - mant = 10*mant + uint64(c-'0') - } - return mant, d.nd -} - // Exact powers of 10. var float64pow10 = []float64{ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, @@ -287,38 +372,41 @@ var float64pow10 = []float64{ } var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10} -// If possible to convert decimal d to 64-bit float f exactly, +// If possible to convert decimal representation to 64-bit float f exactly, // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits. // Three common cases: // value is exact integer // value is exact integer * exact power of ten // value is exact integer / exact power of ten // These all produce potentially inexact but correctly rounded answers. -func (d *decimal) atof64() (f float64, ok bool) { - // Exact integers are <= 10^15. - // Exact powers of ten are <= 10^22. - if d.nd > 15 { +func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) { + if mantissa>>float64info.mantbits != 0 { return } + f = float64(mantissa) + if neg { + f = -f + } switch { - case d.dp == d.nd: // int - f := d.atof64int() + case exp == 0: + // an integer. return f, true - - case d.dp > d.nd && d.dp <= 15+22: // int * 10^k - f := d.atof64int() - k := d.dp - d.nd + // Exact integers are <= 10^15. + // Exact powers of ten are <= 10^22. + case exp > 0 && exp <= 15+22: // int * 10^k // If exponent is big but number of digits is not, // can move a few zeros into the integer part. - if k > 22 { - f *= float64pow10[k-22] - k = 22 + if exp > 22 { + f *= float64pow10[exp-22] + exp = 22 } - return f * float64pow10[k], true - - case d.dp < d.nd && d.nd-d.dp <= 22: // int / 10^k - f := d.atof64int() - return f / float64pow10[d.nd-d.dp], true + if f > 1e15 || f < -1e15 { + // the exponent was really too large. + return + } + return f * float64pow10[exp], true + case exp < 0 && exp >= -22: // int / 10^k + return f / float64pow10[-exp], true } return } @@ -383,26 +471,32 @@ func atof64(s string) (f float64, err error) { return val, nil } - var d decimal - if !d.set(s) { - return 0, syntaxError(fnParseFloat, s) - } if optimize { - if f, ok := d.atof64(); ok { - return f, nil - } - - // Try another fast path. - ext := new(extFloat) - if ok := ext.AssignDecimal(&d); ok { - b, ovf := ext.floatBits() - f = math.Float64frombits(b) - if ovf { - err = rangeError(fnParseFloat, s) + // Parse mantissa and exponent. + mantissa, exp, neg, trunc, ok := readFloat(s) + if ok { + // Try pure floating-point arithmetic conversion. + if !trunc { + if f, ok := atof64exact(mantissa, exp, neg); ok { + return f, nil + } + } + // Try another fast path. + ext := new(extFloat) + if ok := ext.AssignDecimal(mantissa, exp, neg, trunc); ok { + b, ovf := ext.floatBits() + f = math.Float64frombits(b) + if ovf { + err = rangeError(fnParseFloat, s) + } + return f, err } - return f, err } } + var d decimal + if !d.set(s) { + return 0, syntaxError(fnParseFloat, s) + } b, ovf := d.floatBits(&float64info) f = math.Float64frombits(b) if ovf { diff --git a/src/pkg/strconv/extfloat.go b/src/pkg/strconv/extfloat.go index aa5e5607ca..7ba4785bd3 100644 --- a/src/pkg/strconv/extfloat.go +++ b/src/pkg/strconv/extfloat.go @@ -264,24 +264,21 @@ var uint64pow10 = [...]uint64{ 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, } -// AssignDecimal sets f to an approximate value of the decimal d. It +// AssignDecimal sets f to an approximate value mantissa*10^exp. It // returns true if the value represented by f is guaranteed to be the // best approximation of d after being rounded to a float64. -func (f *extFloat) AssignDecimal(d *decimal) (ok bool) { +func (f *extFloat) AssignDecimal(mantissa uint64, exp10 int, neg bool, trunc bool) (ok bool) { const uint64digits = 19 const errorscale = 8 - mant10, digits := d.atou64() - exp10 := d.dp - digits errors := 0 // An upper bound for error, computed in errorscale*ulp. - - if digits < d.nd { + if trunc { // the decimal number was truncated. errors += errorscale / 2 } - f.mant = mant10 + f.mant = mantissa f.exp = 0 - f.neg = d.neg + f.neg = neg // Multiply by powers of ten. i := (exp10 - firstPowerOfTen) / stepPowerOfTen @@ -291,9 +288,9 @@ func (f *extFloat) AssignDecimal(d *decimal) (ok bool) { adjExp := (exp10 - firstPowerOfTen) % stepPowerOfTen // We multiply by exp%step - if digits+adjExp <= uint64digits { - // We can multiply the mantissa - f.mant *= uint64(float64pow10[adjExp]) + if adjExp < uint64digits && mantissa < uint64pow10[uint64digits-adjExp] { + // We can multiply the mantissa exactly. + f.mant *= uint64pow10[adjExp] f.Normalize() } else { f.Normalize() diff --git a/src/pkg/strconv/strconv_test.go b/src/pkg/strconv/strconv_test.go index f6707ba87b..5cab4bf42b 100644 --- a/src/pkg/strconv/strconv_test.go +++ b/src/pkg/strconv/strconv_test.go @@ -7,11 +7,13 @@ package strconv_test import ( "runtime" . "strconv" + "strings" "testing" ) var ( globalBuf [64]byte + nextToOne = "1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1" mallocTest = []struct { count int @@ -30,6 +32,14 @@ var ( AppendFloat(localBuf[:0], 1.23, 'g', 5, 64) }}, {0, `AppendFloat(globalBuf[:0], 1.23, 'g', 5, 64)`, func() { AppendFloat(globalBuf[:0], 1.23, 'g', 5, 64) }}, + {0, `ParseFloat("123.45", 64)`, func() { ParseFloat("123.45", 64) }}, + {0, `ParseFloat("123.456789123456789", 64)`, func() { ParseFloat("123.456789123456789", 64) }}, + {0, `ParseFloat("1.000000000000000111022302462515654042363166809082031251", 64)`, func() { + ParseFloat("1.000000000000000111022302462515654042363166809082031251", 64) + }}, + {0, `ParseFloat("1.0000000000000001110223024625156540423631668090820312500...001", 64)`, func() { + ParseFloat(nextToOne, 64) + }}, } ) -- 2.48.1