math/big: fix Float.Float64 conversion for denormal corner cases

author Robert Griesemer <gri@golang.org>

Fri, 22 May 2015 01:00:54 +0000 (18:00 -0700)

committer Robert Griesemer <gri@golang.org>

Fri, 22 May 2015 21:10:43 +0000 (21:10 +0000)
author Robert Griesemer <gri@golang.org>
Fri, 22 May 2015 01:00:54 +0000 (18:00 -0700)
committer Robert Griesemer <gri@golang.org>
Fri, 22 May 2015 21:10:43 +0000 (21:10 +0000)
diff --git a/src/math/big/float.go b/src/math/big/float.go

index b666b9dd3814c27475e0aba7e6dd29c39817c942..dcb72c575408f264e276165bfa3e953cb36215d4 100644 (file)
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -959,10 +959,6 @@ func (x *Float) Float32() (float32, Accuracy) {
         panic("unreachable")
  }
  
-// TODO(gri) Use same algorithm for Float64 as for Float32. The Float64 code
-// is incorrect for some corner cases (numbers that get rounded up to smallest
-// denormal - test cases are missing).
-
  // Float64 returns the float64 value nearest to x. If x is too small to be
  // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
@@ -987,64 +983,70 @@ func (x *Float) Float64() (float64, Accuracy) {
                         emax  = bias              //  1023  largest unbiased exponent (normal)
                 )
  
-               // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
-               // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
-               // For a given mantissa m, we need to add 1 to a floatxx exponent to get the
-               // corresponding Float exponent.
-               // (see also implementation of math.Ldexp for similar code)
+               // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+               e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+               p := mbits + 1 // precision of normal float
  
-               if x.exp < dmin+1 {
-                       // underflow
-                       if x.neg {
-                               var z float64
-                               return -z, Above
+               // If the exponent is too small, we may have a denormal number
+               // in which case we have fewer mantissa bits available: reduce
+               // precision accordingly.
+               if e < emin {
+                       p -= emin - int(e)
+                       // Make sure we have at least 1 bit so that we don't
+                       // lose numbers rounded up to the smallest denormal.
+                       if p < 1 {
+                               p = 1
                         }
-                       return 0.0, Below
                 }
-               // x.exp >= dmin+1
  
+               // round
                 var r Float
-               r.prec = mbits + 1 // +1 for implicit msb
-               if x.exp < emin+1 {
-                       // denormal number - round to fewer bits
-                       r.prec = uint32(x.exp - dmin)
-               }
+               r.prec = uint32(p)
                 r.Set(x)
+               e = r.exp - 1
  
                 // Rounding may have caused r to overflow to ±Inf
                 // (rounding never causes underflows to 0).
                 if r.form == inf {
-                       r.exp = emax + 2 // cause overflow below
+                       e = emax + 1 // cause overflow below
                 }
  
-               if r.exp > emax+1 {
+               // If the exponent is too large, overflow to ±Inf.
+               if e > emax {
                         // overflow
                         if x.neg {
                                 return math.Inf(-1), Below
                         }
                         return math.Inf(+1), Above
                 }
-               // dmin+1 <= r.exp <= emax+1
  
-               var s uint64
-               if r.neg {
-                       s = 1 << (fbits - 1)
+               // Determine sign, biased exponent, and mantissa.
+               var sign, bexp, mant uint64
+               if x.neg {
+                       sign = 1 << (fbits - 1)
                 }
  
-               m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
                 // Rounding may have caused a denormal number to
                 // become normal. Check again.
-               c := 1.0
-               if r.exp < emin+1 {
+               if e < emin {
                         // denormal number
-                       r.exp += mbits
-                       c = 1.0 / (1 << mbits) // 2**-mbits
+                       if e < dmin {
+                               // underflow to ±0
+                               if x.neg {
+                                       var z float64
+                                       return -z, Above
+                               }
+                               return 0.0, Below
+                       }
+                       // bexp = 0
+                       mant = high64(r.mant) >> (fbits - r.prec)
+               } else {
+                       // normal number: emin <= e <= emax
+                       bexp = uint64(e+bias) << mbits
+                       mant = high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
                 }
-               // emin+1 <= r.exp <= emax+1
-               e := uint64(r.exp-emin) << mbits
  
-               return c * math.Float64frombits(s|e|m), r.acc
+               return math.Float64frombits(sign | bexp | mant), r.acc
  
         case zero:
                 if x.neg {
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go

index 92eec71c5fc2c4dca94bc3211697046a8fe9ffe1..8bd3a9c8c965324cdb88cbfb8f835f1b80a3def6 100644 (file)
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@@ -922,9 +922,8 @@ func TestFloatFloat64(t *testing.T) {
                 {"0x0.00000000000008p-1022", 0, Below},
  
                 // denormals
-               // TODO(gri) enable once Float64 is fixed
-               // {"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
-               {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact}, // smallest denormal
+               {"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
+               {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},  // smallest denormal
                 {"0x.8p-1073", math.SmallestNonzeroFloat64, Exact},
                 {"1p-1074", math.SmallestNonzeroFloat64, Exact},
                 {"0x.fffffffffffffp-1022", math.Float64frombits(0x000fffffffffffff), Exact}, // largest denormal
author	Robert Griesemer <gri@golang.org>
	Fri, 22 May 2015 01:00:54 +0000 (18:00 -0700)
committer	Robert Griesemer <gri@golang.org>
	Fri, 22 May 2015 21:10:43 +0000 (21:10 +0000)
src/math/big/float.go		patch \| blob \| history
src/math/big/float_test.go		patch \| blob \| history