strconv: 2x-4x speed improvement for atof64.

author Rémy Oudompheng <oudomphe@phare.normalesup.org>

Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)

committer Rémy Oudompheng <oudomphe@phare.normalesup.org>

Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)
author Rémy Oudompheng <oudomphe@phare.normalesup.org>
Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)
committer Rémy Oudompheng <oudomphe@phare.normalesup.org>
Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)
diff --git a/src/pkg/strconv/atof.go b/src/pkg/strconv/atof.go

index cd3031b0e61b86d30998389534e3ff82746a2460..b43fab4f0760df29dd4ed79cee615cc8d47b4cb6 100644 (file)
--- a/src/pkg/strconv/atof.go
+++ b/src/pkg/strconv/atof.go
@@ -37,17 +37,28 @@ func equalIgnoreCase(s1, s2 string) bool {
  }
  
  func special(s string) (f float64, ok bool) {
-       switch {
-       case equalIgnoreCase(s, "nan"):
-               return math.NaN(), true
-       case equalIgnoreCase(s, "-inf"),
-               equalIgnoreCase(s, "-infinity"):
-               return math.Inf(-1), true
-       case equalIgnoreCase(s, "+inf"),
-               equalIgnoreCase(s, "+infinity"),
-               equalIgnoreCase(s, "inf"),
-               equalIgnoreCase(s, "infinity"):
-               return math.Inf(1), true
+       if len(s) == 0 {
+               return
+       }
+       switch s[0] {
+       default:
+               return
+       case '+':
+               if equalIgnoreCase(s, "+inf") || equalIgnoreCase(s, "+infinity") {
+                       return math.Inf(1), true
+               }
+       case '-':
+               if equalIgnoreCase(s, "-inf") || equalIgnoreCase(s, "-infinity") {
+                       return math.Inf(-1), true
+               }
+       case 'n', 'N':
+               if equalIgnoreCase(s, "nan") {
+                       return math.NaN(), true
+               }
+       case 'i', 'I':
+               if equalIgnoreCase(s, "inf") || equalIgnoreCase(s, "infinity") {
+                       return math.Inf(1), true
+               }
         }
         return
  }
@@ -142,6 +153,105 @@ func (b *decimal) set(s string) (ok bool) {
         return
  }
  
+// readFloat reads a decimal mantissa and exponent from a float
+// string representation. It sets ok to false if the number could
+// not fit return types or is invalid.
+func readFloat(s string) (mantissa uint64, exp int, neg, trunc, ok bool) {
+       const uint64digits = 19
+       i := 0
+
+       // optional sign
+       if i >= len(s) {
+               return
+       }
+       switch {
+       case s[i] == '+':
+               i++
+       case s[i] == '-':
+               neg = true
+               i++
+       }
+
+       // digits
+       sawdot := false
+       sawdigits := false
+       nd := 0
+       ndMant := 0
+       dp := 0
+       for ; i < len(s); i++ {
+               switch c := s[i]; true {
+               case c == '.':
+                       if sawdot {
+                               return
+                       }
+                       sawdot = true
+                       dp = nd
+                       continue
+
+               case '0' <= c && c <= '9':
+                       sawdigits = true
+                       if c == '0' && nd == 0 { // ignore leading zeros
+                               dp--
+                               continue
+                       }
+                       nd++
+                       if ndMant < uint64digits {
+                               mantissa *= 10
+                               mantissa += uint64(c - '0')
+                               ndMant++
+                       } else if s[i] != '0' {
+                               trunc = true
+                       }
+                       continue
+               }
+               break
+       }
+       if !sawdigits {
+               return
+       }
+       if !sawdot {
+               dp = nd
+       }
+
+       // optional exponent moves decimal point.
+       // if we read a very large, very long number,
+       // just be sure to move the decimal point by
+       // a lot (say, 100000).  it doesn't matter if it's
+       // not the exact number.
+       if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
+               i++
+               if i >= len(s) {
+                       return
+               }
+               esign := 1
+               if s[i] == '+' {
+                       i++
+               } else if s[i] == '-' {
+                       i++
+                       esign = -1
+               }
+               if i >= len(s) || s[i] < '0' || s[i] > '9' {
+                       return
+               }
+               e := 0
+               for ; i < len(s) && '0' <= s[i] && s[i] <= '9'; i++ {
+                       if e < 10000 {
+                               e = e*10 + int(s[i]) - '0'
+                       }
+               }
+               dp += e * esign
+       }
+
+       if i != len(s) {
+               return
+       }
+
+       exp = dp - ndMant
+       ok = true
+       return
+
+}
+
  // decimal power of ten to binary power of two.
  var powtab = []int{1, 3, 6, 9, 13, 16, 19, 23, 26}
  
@@ -243,19 +353,6 @@ out:
         return bits, overflow
  }
  
-// Compute exact floating-point integer from d's digits.
-// Caller is responsible for avoiding overflow.
-func (d *decimal) atof64int() float64 {
-       f := 0.0
-       for i := 0; i < d.nd; i++ {
-               f = f*10 + float64(d.d[i]-'0')
-       }
-       if d.neg {
-               f = -f
-       }
-       return f
-}
-
  func (d *decimal) atof32int() float32 {
         f := float32(0)
         for i := 0; i < d.nd; i++ {
@@ -267,18 +364,6 @@ func (d *decimal) atof32int() float32 {
         return f
  }
  
-// Reads a uint64 decimal mantissa, which might be truncated.
-func (d *decimal) atou64() (mant uint64, digits int) {
-       const uint64digits = 19
-       for i, c := range d.d[:d.nd] {
-               if i == uint64digits {
-                       return mant, i
-               }
-               mant = 10*mant + uint64(c-'0')
-       }
-       return mant, d.nd
-}
-
  // Exact powers of 10.
  var float64pow10 = []float64{
         1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
@@ -287,38 +372,41 @@ var float64pow10 = []float64{
  }
  var float32pow10 = []float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
  
-// If possible to convert decimal d to 64-bit float f exactly,
+// If possible to convert decimal representation to 64-bit float f exactly,
  // entirely in floating-point math, do so, avoiding the expense of decimalToFloatBits.
  // Three common cases:
  //     value is exact integer
  //     value is exact integer * exact power of ten
  //     value is exact integer / exact power of ten
  // These all produce potentially inexact but correctly rounded answers.
-func (d *decimal) atof64() (f float64, ok bool) {
-       // Exact integers are <= 10^15.
-       // Exact powers of ten are <= 10^22.
-       if d.nd > 15 {
+func atof64exact(mantissa uint64, exp int, neg bool) (f float64, ok bool) {
+       if mantissa>>float64info.mantbits != 0 {
                 return
         }
+       f = float64(mantissa)
+       if neg {
+               f = -f
+       }
         switch {
-       case d.dp == d.nd: // int
-               f := d.atof64int()
+       case exp == 0:
+               // an integer.
                 return f, true
-
-       case d.dp > d.nd && d.dp <= 15+22: // int * 10^k
-               f := d.atof64int()
-               k := d.dp - d.nd
+       // Exact integers are <= 10^15.
+       // Exact powers of ten are <= 10^22.
+       case exp > 0 && exp <= 15+22: // int * 10^k
                 // If exponent is big but number of digits is not,
                 // can move a few zeros into the integer part.
-               if k > 22 {
-                       f *= float64pow10[k-22]
-                       k = 22
+               if exp > 22 {
+                       f *= float64pow10[exp-22]
+                       exp = 22
                 }
-               return f * float64pow10[k], true
-
-       case d.dp < d.nd && d.nd-d.dp <= 22: // int / 10^k
-               f := d.atof64int()
-               return f / float64pow10[d.nd-d.dp], true
+               if f > 1e15 || f < -1e15 {
+                       // the exponent was really too large.
+                       return
+               }
+               return f * float64pow10[exp], true
+       case exp < 0 && exp >= -22: // int / 10^k
+               return f / float64pow10[-exp], true
         }
         return
  }
@@ -383,26 +471,32 @@ func atof64(s string) (f float64, err error) {
                 return val, nil
         }
  
-       var d decimal
-       if !d.set(s) {
-               return 0, syntaxError(fnParseFloat, s)
-       }
         if optimize {
-               if f, ok := d.atof64(); ok {
-                       return f, nil
-               }
-
-               // Try another fast path.
-               ext := new(extFloat)
-               if ok := ext.AssignDecimal(&d); ok {
-                       b, ovf := ext.floatBits()
-                       f = math.Float64frombits(b)
-                       if ovf {
-                               err = rangeError(fnParseFloat, s)
+               // Parse mantissa and exponent.
+               mantissa, exp, neg, trunc, ok := readFloat(s)
+               if ok {
+                       // Try pure floating-point arithmetic conversion.
+                       if !trunc {
+                               if f, ok := atof64exact(mantissa, exp, neg); ok {
+                                       return f, nil
+                               }
+                       }
+                       // Try another fast path.
+                       ext := new(extFloat)
+                       if ok := ext.AssignDecimal(mantissa, exp, neg, trunc); ok {
+                               b, ovf := ext.floatBits()
+                               f = math.Float64frombits(b)
+                               if ovf {
+                                       err = rangeError(fnParseFloat, s)
+                               }
+                               return f, err
                         }
-                       return f, err
                 }
         }
+       var d decimal
+       if !d.set(s) {
+               return 0, syntaxError(fnParseFloat, s)
+       }
         b, ovf := d.floatBits(&float64info)
         f = math.Float64frombits(b)
         if ovf {
diff --git a/src/pkg/strconv/extfloat.go b/src/pkg/strconv/extfloat.go

index aa5e5607ca0b94c795ddeaff0e21f7c9faa76160..7ba4785bd33153e731868a44ca509d1c225b7ab9 100644 (file)
--- a/src/pkg/strconv/extfloat.go
+++ b/src/pkg/strconv/extfloat.go
@@ -264,24 +264,21 @@ var uint64pow10 = [...]uint64{
         1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
  }
  
-// AssignDecimal sets f to an approximate value of the decimal d. It
+// AssignDecimal sets f to an approximate value mantissa*10^exp. It
  // returns true if the value represented by f is guaranteed to be the
  // best approximation of d after being rounded to a float64. 
-func (f *extFloat) AssignDecimal(d *decimal) (ok bool) {
+func (f *extFloat) AssignDecimal(mantissa uint64, exp10 int, neg bool, trunc bool) (ok bool) {
         const uint64digits = 19
         const errorscale = 8
-       mant10, digits := d.atou64()
-       exp10 := d.dp - digits
         errors := 0 // An upper bound for error, computed in errorscale*ulp.
-
-       if digits < d.nd {
+       if trunc {
                 // the decimal number was truncated.
                 errors += errorscale / 2
         }
  
-       f.mant = mant10
+       f.mant = mantissa
         f.exp = 0
-       f.neg = d.neg
+       f.neg = neg
  
         // Multiply by powers of ten.
         i := (exp10 - firstPowerOfTen) / stepPowerOfTen
@@ -291,9 +288,9 @@ func (f *extFloat) AssignDecimal(d *decimal) (ok bool) {
         adjExp := (exp10 - firstPowerOfTen) % stepPowerOfTen
  
         // We multiply by exp%step
-       if digits+adjExp <= uint64digits {
-               // We can multiply the mantissa
-               f.mant *= uint64(float64pow10[adjExp])
+       if adjExp < uint64digits && mantissa < uint64pow10[uint64digits-adjExp] {
+               // We can multiply the mantissa exactly.
+               f.mant *= uint64pow10[adjExp]
                 f.Normalize()
         } else {
                 f.Normalize()
diff --git a/src/pkg/strconv/strconv_test.go b/src/pkg/strconv/strconv_test.go

index f6707ba87b58dfef4d42fbd7f76b32c17b752337..5cab4bf42b8fcec26391ca34904cb2785d7bb184 100644 (file)
--- a/src/pkg/strconv/strconv_test.go
+++ b/src/pkg/strconv/strconv_test.go
@@ -7,11 +7,13 @@ package strconv_test
  import (
         "runtime"
         . "strconv"
+       "strings"
         "testing"
  )
  
  var (
         globalBuf [64]byte
+       nextToOne = "1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1"
  
         mallocTest = []struct {
                 count int
@@ -30,6 +32,14 @@ var (
                         AppendFloat(localBuf[:0], 1.23, 'g', 5, 64)
                 }},
                 {0, `AppendFloat(globalBuf[:0], 1.23, 'g', 5, 64)`, func() { AppendFloat(globalBuf[:0], 1.23, 'g', 5, 64) }},
+               {0, `ParseFloat("123.45", 64)`, func() { ParseFloat("123.45", 64) }},
+               {0, `ParseFloat("123.456789123456789", 64)`, func() { ParseFloat("123.456789123456789", 64) }},
+               {0, `ParseFloat("1.000000000000000111022302462515654042363166809082031251", 64)`, func() {
+                       ParseFloat("1.000000000000000111022302462515654042363166809082031251", 64)
+               }},
+               {0, `ParseFloat("1.0000000000000001110223024625156540423631668090820312500...001", 64)`, func() {
+                       ParseFloat(nextToOne, 64)
+               }},
         }
  )
author	Rémy Oudompheng <oudomphe@phare.normalesup.org>
	Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)
committer	Rémy Oudompheng <oudomphe@phare.normalesup.org>
	Sat, 21 Apr 2012 11:56:51 +0000 (13:56 +0200)
src/pkg/strconv/atof.go		patch \| blob \| history
src/pkg/strconv/extfloat.go		patch \| blob \| history
src/pkg/strconv/strconv_test.go		patch \| blob \| history