math/big: fix known bug in Float.Float64

author Robert Griesemer <gri@golang.org>

Tue, 24 Mar 2015 00:31:25 +0000 (17:31 -0700)

committer Robert Griesemer <gri@golang.org>

Tue, 24 Mar 2015 20:34:14 +0000 (20:34 +0000)
author Robert Griesemer <gri@golang.org>
Tue, 24 Mar 2015 00:31:25 +0000 (17:31 -0700)
committer Robert Griesemer <gri@golang.org>
Tue, 24 Mar 2015 20:34:14 +0000 (20:34 +0000)
diff --git a/src/math/big/float.go b/src/math/big/float.go

index a86471e2a51e1847b47093cfb24081bf332e18df..fa3751d0c7ea583395c4bf44485a77d581099089 100644 (file)
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -872,9 +872,14 @@ func (x *Float) Int64() (int64, Accuracy) {
         panic("unreachable")
  }
  
-// Float64 returns the closest float64 value of x
-// by rounding to nearest with 53 bits precision.
-// BUG(gri) Float.Float64 doesn't handle exponent overflow.
+// Float64 returns the float64 value nearest to x by rounding ToNearestEven
+// with 53 bits of precision.
+// If x is too small to be represented by a float64
+// (|x| < math.SmallestNonzeroFloat64), the result is (0, Below) or
+// (-0, Above), respectively, depending on the sign of x.
+// If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
+// the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
+// The result is (NaN, Undef) for NaNs.
  func (x *Float) Float64() (float64, Accuracy) {
         if debugFloat {
                 x.validate()
@@ -886,27 +891,67 @@ func (x *Float) Float64() (float64, Accuracy) {
                 var r Float
                 r.prec = 53
                 r.Set(x)
-               var s uint64
+
+               // Rounding via Set may have caused r to overflow
+               // to ±Inf (rounding never causes underflows to 0).
+               if r.form == inf {
+                       r.exp = 10000 // cause overflow below
+               }
+
+               // see also implementation of math.Ldexp
+
+               e := int64(r.exp) + 1022
+               if e <= -52 {
+                       // underflow
+                       if x.neg {
+                               z := 0.0
+                               return -z, Above
+                       }
+                       return 0.0, Below
+               }
+               // e > -52
+
+               if e >= 2047 {
+                       // overflow
+                       if x.neg {
+                               return math.Inf(-1), Below
+                       }
+                       return math.Inf(+1), Above
+               }
+               // -52 < e < 2047
+
+               denormal := false
+               if e < 0 {
+                       denormal = true
+                       e += 52
+               }
+               // 0 < e < 2047
+
+               s := uint64(0)
                 if r.neg {
                         s = 1 << 63
                 }
-               e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
-               m := high64(r.mant) >> 11 & (1<<52 - 1)
-               return math.Float64frombits(s | e<<52 | m), r.acc
+               m := high64(r.mant) >> 11 & (1<<52 - 1) // cut off msb (implicit 1 bit)
+               z := math.Float64frombits(s | uint64(e)<<52 | m)
+               if denormal {
+                       // adjust for denormal
+                       // TODO(gri) does this change accuracy?
+                       z /= 1 << 52
+               }
+               return z, r.acc
  
         case zero:
-               z := 0.0
                 if x.neg {
-                       z = -z
+                       z := 0.0
+                       return -z, Exact
                 }
-               return z, Exact
+               return 0.0, Exact
  
         case inf:
-               sign := +1
                 if x.neg {
-                       sign = -1
+                       return math.Inf(-1), Exact
                 }
-               return math.Inf(sign), Exact
+               return math.Inf(+1), Exact
  
         case nan:
                 return math.NaN(), Undef
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go

index 379352c886d62732173527cd7d7aa006e184d4a5..7bfac5d66b06d3219d3972728258e5f9ca85d6b0 100644 (file)
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@@ -627,6 +627,10 @@ func TestFloatSetFloat64(t *testing.T) {
                 3.14159265e10,
                 2.718281828e-123,
                 1.0 / 3,
+               math.MaxFloat32,
+               math.MaxFloat64,
+               math.SmallestNonzeroFloat32,
+               math.SmallestNonzeroFloat64,
                 math.Inf(-1),
                 math.Inf(0),
                 -math.Inf(1),
@@ -637,8 +641,8 @@ func TestFloatSetFloat64(t *testing.T) {
                         }
                         var f Float
                         f.SetFloat64(want)
-                       if got, _ := f.Float64(); got != want {
-                               t.Errorf("got %g (%s); want %g", got, f.Format('p', 0), want)
+                       if got, acc := f.Float64(); got != want || acc != Exact {
+                               t.Errorf("got %g (%s, %s); want %g (Exact)", got, f.Format('p', 0), acc, want)
                         }
                 }
         }
@@ -833,6 +837,56 @@ func TestFloatInt64(t *testing.T) {
         }
  }
  
+func TestFloatFloat64(t *testing.T) {
+       for _, test := range []struct {
+               x   string
+               out float64
+               acc Accuracy
+       }{
+               {"-Inf", math.Inf(-1), Exact},
+               {"-0x1.fffffffffffff8p2147483646", -math.Inf(+1), Below}, // overflow in rounding
+               {"-1e10000", math.Inf(-1), Below},                        // overflow
+               {"-0x1p1024", math.Inf(-1), Below},                       // overflow
+               {"-0x1.fffffffffffff8p1023", -math.Inf(+1), Below},       // overflow
+               {"-0x1.fffffffffffff4p1023", -math.MaxFloat64, Above},
+               {"-0x1.fffffffffffffp1023", -math.MaxFloat64, Exact},
+               {"-12345.000000000000000000001", -12345, Above},
+               {"-12345.0", -12345, Exact},
+               {"-1.000000000000000000001", -1, Above},
+               {"-1", -1, Exact},
+               {"-0x0.0000000000001p-1022", -math.SmallestNonzeroFloat64, Exact},
+               {"-0x0.0000000000001p-1023", -0, Above}, // underflow
+               {"-1e-1000", -0, Above},                 // underflow
+               {"0", 0, Exact},
+               {"1e-1000", 0, Below},                 // underflow
+               {"0x0.0000000000001p-1023", 0, Below}, // underflow
+               {"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},
+               {"1", 1, Exact},
+               {"1.000000000000000000001", 1, Below},
+               {"12345.0", 12345, Exact},
+               {"12345.000000000000000000001", 12345, Below},
+               {"0x1.fffffffffffffp1023", math.MaxFloat64, Exact},
+               {"0x1.fffffffffffff4p1023", math.MaxFloat64, Below},
+               {"0x1.fffffffffffff8p1023", math.Inf(+1), Above},       // overflow
+               {"0x1p1024", math.Inf(+1), Above},                      // overflow
+               {"1e10000", math.Inf(+1), Above},                       // overflow
+               {"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding
+               {"+Inf", math.Inf(+1), Exact},
+       } {
+               x := makeFloat(test.x)
+               out, acc := x.Float64()
+               if out != test.out || acc != test.acc {
+                       t.Errorf("%s: got %g (%s); want %g (%s)", test.x, out, acc, test.out, test.acc)
+               }
+       }
+
+       // test NaN
+       x := makeFloat("NaN")
+       if out, acc := x.Float64(); out == out || acc != Undef {
+               t.Errorf("NaN: got %g (%s); want NaN (Undef)", out, acc)
+       }
+}
+
  func TestFloatInt(t *testing.T) {
         for _, test := range []struct {
                 x    string
@@ -1073,14 +1127,14 @@ func TestFloatAdd32(t *testing.T) {
                         got, acc := z.Float64()
                         want := float64(float32(y0) + float32(x0))
                         if got != want || acc != Exact {
-                               t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+                               t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
                         }
  
                         z.Sub(z, y)
                         got, acc = z.Float64()
                         want = float64(float32(want) - float32(y0))
                         if got != want || acc != Exact {
-                               t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+                               t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
                         }
                 }
         }
@@ -1106,14 +1160,14 @@ func TestFloatAdd64(t *testing.T) {
                         got, acc := z.Float64()
                         want := x0 + y0
                         if got != want || acc != Exact {
-                               t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+                               t.Errorf("d = %d: %g + %g = %g (%s); want %g (Exact)", d, x0, y0, got, acc, want)
                         }
  
                         z.Sub(z, y)
                         got, acc = z.Float64()
                         want -= y0
                         if got != want || acc != Exact {
-                               t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+                               t.Errorf("d = %d: %g - %g = %g (%s); want %g (Exact)", d, x0+y0, y0, got, acc, want)
                         }
                 }
         }
author	Robert Griesemer <gri@golang.org>
	Tue, 24 Mar 2015 00:31:25 +0000 (17:31 -0700)
committer	Robert Griesemer <gri@golang.org>
	Tue, 24 Mar 2015 20:34:14 +0000 (20:34 +0000)
src/math/big/float.go		patch \| blob \| history
src/math/big/float_test.go		patch \| blob \| history