math/big: handling of +/-Inf and zero precision, enable zero values

author Robert Griesemer <gri@golang.org>

Fri, 6 Feb 2015 01:21:48 +0000 (17:21 -0800)

committer Robert Griesemer <gri@golang.org>

Fri, 6 Feb 2015 17:21:01 +0000 (17:21 +0000)
author Robert Griesemer <gri@golang.org>
Fri, 6 Feb 2015 01:21:48 +0000 (17:21 -0800)
committer Robert Griesemer <gri@golang.org>
Fri, 6 Feb 2015 17:21:01 +0000 (17:21 +0000)
diff --git a/src/math/big/float.go b/src/math/big/float.go

index ea42a9166e9ed2857809fca51b93d0f4acfb1043..44e75cbf395202d283a8eb84bbb4db1da87fbeef 100644 (file)
--- a/src/math/big/float.go
+++ b/src/math/big/float.go
@@ -18,10 +18,6 @@ import (
         "math"
  )
  
-// TODO(gri): Determine if there's a more natural way to set the precision.
-// Should there be a special meaning for prec 0? Such as "full precision"?
-// (would be possible for all ops except quotient).
-
  const debugFloat = true // enable for debugging
  
  // Internal representation: A floating-point value x != 0 consists
@@ -45,14 +41,15 @@ const debugFloat = true // enable for debugging
  //
  //   sign * mantissa * 2**exponent
  //
-// Each value also has a precision, rounding mode, and accuracy value:
-// The precision is the number of mantissa bits used to represent a
-// value, and the result of operations is rounded to that many bits
-// according to the value's rounding mode (unless specified othewise).
+// Each value also has a precision, rounding mode, and accuracy value.
+// The precision is the number of mantissa bits used to represent the
+// value, and the result of an operation is rounded to that many bits
+// according to the value's rounding mode (unless specified otherwise).
  // The accuracy value indicates the rounding error with respect to the
  // exact (not rounded) value.
  //
-// The zero value for a Float represents the number 0.
+// The zero (uninitialized) value for a Float is ready to use and
+// represents the number 0.0 of 0 bit precision.
  //
  // By setting the desired precision to 24 (or 53) and using ToNearestEven
  // rounding, Float arithmetic operations emulate the corresponding float32
@@ -71,14 +68,26 @@ type Float struct {
  
  // NewFloat returns a new Float with value x rounded
  // to prec bits according to the given rounding mode.
+// If prec == 0, the result has value 0.0 independent
+// of the value of x.
+// BUG(gri) For prec == 0 and x == Inf, the result
+// should be Inf as well.
  func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
-       // TODO(gri) should make this more efficient
-       z := new(Float).SetFloat64(x)
-       return z.Round(z, prec, mode)
+       var z Float
+       if prec > 0 {
+               // TODO(gri) should make this more efficient
+               z.SetFloat64(x)
+               return z.Round(&z, prec, mode)
+       }
+       z.mode = mode // TODO(gri) don't do this twice for prec > 0
+       return &z
  }
  
-// infExp is the exponent value for infinity.
-const infExp = 1<<31 - 1
+// Special exponent values.
+const (
+       maxExp = math.MaxInt32
+       infExp = -maxExp - 1 // exponent value for Inf values
+)
  
  // NewInf returns a new Float with value positive infinity (sign >= 0),
  // or negative infinity (sign < 0).
@@ -86,12 +95,16 @@ func NewInf(sign int) *Float {
         return &Float{neg: sign < 0, exp: infExp}
  }
  
+// setExp sets the exponent for z.
+// If the exponent is too small or too large, z becomes +/-Inf.
  func (z *Float) setExp(e int64) {
-       e32 := int32(e)
-       if int64(e32) != e {
-               panic("exponent overflow") // TODO(gri) handle this gracefully
+       if -maxExp <= e && e <= maxExp {
+               z.exp = int32(e)
+               return
         }
-       z.exp = e32
+       // Inf
+       z.mant = z.mant[:0]
+       z.exp = infExp
  }
  
  // Accuracy describes the rounding error produced by the most recent
@@ -155,7 +168,7 @@ func (mode RoundingMode) String() string {
  }
  
  // Precision returns the mantissa precision of x in bits.
-// The precision may be 0 if x == 0. // TODO(gri) Determine a better approach.
+// The precision may be 0 for |x| == 0 or |x| == Inf.
  func (x *Float) Precision() uint {
         return uint(x.prec)
  }
@@ -170,9 +183,17 @@ func (x *Float) Mode() RoundingMode {
         return x.mode
  }
  
+// IsInf reports whether x is an infinity, according to sign.
+// If sign > 0, IsInf reports whether x is positive infinity.
+// If sign < 0, IsInf reports whether x is negative infinity.
+// If sign == 0, IsInf reports whether x is either infinity.
+func (x *Float) IsInf(sign int) bool {
+       return x.exp == infExp && (sign == 0 || x.neg == (sign < 0))
+}
+
  // debugging support
  func (x *Float) validate() {
-       // assumes x != 0
+       // assumes x != 0 && x != Inf
         const msb = 1 << (_W - 1)
         m := len(x.mant)
         if x.mant[m-1]&msb == 0 {
@@ -196,6 +217,9 @@ func (z *Float) round(sbit uint) {
                 return
         }
  
+       // handle Inf
+       // TODO(gri) handle Inf
+
         if debugFloat {
                 z.validate()
         }
@@ -399,10 +423,15 @@ func (z *Float) SetInt64(x int64) *Float {
  
  // SetFloat64 sets z to x and returns z.
  // Precision is set to 53 bits.
-// TODO(gri) test denormals, +/-Inf, disallow NaN.
+// TODO(gri) test denormals, disallow NaN.
  func (z *Float) SetFloat64(x float64) *Float {
-       z.prec = 53
         z.neg = math.Signbit(x) // handle -0 correctly (-0 == 0)
+       z.prec = 53
+       if math.IsInf(x, 0) {
+               z.mant = z.mant[:0]
+               z.exp = infExp
+               return z
+       }
         if x == 0 {
                 z.mant = z.mant[:0]
                 z.exp = 0
@@ -484,7 +513,7 @@ func high64(x nat) uint64 {
         return v
  }
  
-// TODO(gri) FIX THIS (rounding mode, errors, accuracy, etc.)
+// TODO(gri) FIX THIS (Inf, rounding mode, errors, accuracy, etc.)
  func (x *Float) Uint64() uint64 {
         m := high64(x.mant)
         s := x.exp
@@ -494,7 +523,7 @@ func (x *Float) Uint64() uint64 {
         return 0 // imprecise
  }
  
-// TODO(gri) FIX THIS (rounding mode, errors, etc.)
+// TODO(gri) FIX THIS (inf, rounding mode, errors, etc.)
  func (x *Float) Int64() int64 {
         v := int64(x.Uint64())
         if x.neg {
@@ -507,6 +536,15 @@ func (x *Float) Int64() int64 {
  // by rounding to nearest with 53 bits precision.
  // TODO(gri) implement/document error scenarios.
  func (x *Float) Float64() (float64, Accuracy) {
+       // x == +/-Inf
+       if x.exp == infExp {
+               var sign int
+               if x.neg {
+                       sign = -1
+               }
+               return math.Inf(sign), Exact
+       }
+       // x == 0
         if len(x.mant) == 0 {
                 return 0, Exact
         }
@@ -561,7 +599,7 @@ func (z *Float) Neg(x *Float) *Float {
  }
  
  // z = x + y, ignoring signs of x and y.
-// x and y must not be 0.
+// x and y must not be 0 or an Inf.
  func (z *Float) uadd(x, y *Float) {
         // Note: This implementation requires 2 shifts most of the
         // time. It is also inefficient if exponents or precisions
@@ -603,7 +641,7 @@ func (z *Float) uadd(x, y *Float) {
  }
  
  // z = x - y for x >= y, ignoring signs of x and y.
-// x and y must not be zero.
+// x and y must not be 0 or an Inf.
  func (z *Float) usub(x, y *Float) {
         // This code is symmetric to uadd.
         // We have not factored the common code out because
@@ -643,7 +681,7 @@ func (z *Float) usub(x, y *Float) {
  }
  
  // z = x * y, ignoring signs of x and y.
-// x and y must not be zero.
+// x and y must not be 0 or an Inf.
  func (z *Float) umul(x, y *Float) {
         if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
                 panic("umul called with 0 argument")
@@ -664,7 +702,7 @@ func (z *Float) umul(x, y *Float) {
  }
  
  // z = x / y, ignoring signs of x and y.
-// x and y must not be zero.
+// x and y must not be 0 or an Inf.
  func (z *Float) uquo(x, y *Float) {
         if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
                 panic("uquo called with 0 argument")
@@ -708,7 +746,7 @@ func (z *Float) uquo(x, y *Float) {
  }
  
  // ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y,
-// while ignoring the signs of x and y. x and y must not be zero.
+// while ignoring the signs of x and y. x and y must not be 0 or an Inf.
  func (x *Float) ucmp(y *Float) int {
         if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
                 panic("ucmp called with 0 argument")
@@ -765,16 +803,24 @@ func (x *Float) ucmp(y *Float) int {
  // sign as x even when x is zero.
  
  // Add sets z to the rounded sum x+y and returns z.
+// If z's precision is 0, it is set to the larger of
+// x's or y's precision before the operation.
  // Rounding is performed according to z's precision
  // and rounding mode; and z's accuracy reports the
  // result error relative to the exact (not rounded)
  // result.
  func (z *Float) Add(x, y *Float) *Float {
+       if z.prec == 0 {
+               z.prec = umax(x.prec, y.prec)
+       }
+
         // TODO(gri) what about -0?
         if len(y.mant) == 0 {
+               // TODO(gri) handle Inf
                 return z.Round(x, z.prec, z.mode)
         }
         if len(x.mant) == 0 {
+               // TODO(gri) handle Inf
                 return z.Round(y, z.prec, z.mode)
         }
  
@@ -799,13 +845,15 @@ func (z *Float) Add(x, y *Float) *Float {
  }
  
  // Sub sets z to the rounded difference x-y and returns z.
-// Rounding is performed according to z's precision
-// and rounding mode; and z's accuracy reports the
-// result error relative to the exact (not rounded)
-// result.
+// Precision, rounding, and accuracy reporting are as for Add.
  func (z *Float) Sub(x, y *Float) *Float {
+       if z.prec == 0 {
+               z.prec = umax(x.prec, y.prec)
+       }
+
         // TODO(gri) what about -0?
         if len(y.mant) == 0 {
+               // TODO(gri) handle Inf
                 return z.Round(x, z.prec, z.mode)
         }
         if len(x.mant) == 0 {
@@ -836,11 +884,14 @@ func (z *Float) Sub(x, y *Float) *Float {
  }
  
  // Mul sets z to the rounded product x*y and returns z.
-// Rounding is performed according to z's precision
-// and rounding mode; and z's accuracy reports the
-// result error relative to the exact (not rounded)
-// result.
+// Precision, rounding, and accuracy reporting are as for Add.
  func (z *Float) Mul(x, y *Float) *Float {
+       if z.prec == 0 {
+               z.prec = umax(x.prec, y.prec)
+       }
+
+       // TODO(gri) handle Inf
+
         // TODO(gri) what about -0?
         if len(x.mant) == 0 || len(y.mant) == 0 {
                 z.neg = false
@@ -858,46 +909,61 @@ func (z *Float) Mul(x, y *Float) *Float {
  
  // Quo sets z to the rounded quotient x/y and returns z.
  // If y == 0, a division-by-zero run-time panic occurs. TODO(gri) this should become Inf
-// Rounding is performed according to z's precision
-// and rounding mode; and z's accuracy reports the
-// result error relative to the exact (not rounded)
-// result.
+// Precision, rounding, and accuracy reporting are as for Add.
  func (z *Float) Quo(x, y *Float) *Float {
-       // TODO(gri) what about -0?
+       if z.prec == 0 {
+               z.prec = umax(x.prec, y.prec)
+       }
+
+       // TODO(gri) handle Inf
+
+       // TODO(gri) check that this is correct
+       z.neg = x.neg != y.neg
+
+       if len(y.mant) == 0 {
+               z.setExp(infExp)
+               return z
+       }
+
         if len(x.mant) == 0 {
-               z.neg = false
                 z.mant = z.mant[:0]
                 z.exp = 0
                 z.acc = Exact
                 return z
         }
-       if len(y.mant) == 0 {
-               panic("division-by-zero") // TODO(gri) handle this better
-       }
  
         // x, y != 0
         z.uquo(x, y)
-       z.neg = x.neg != y.neg
         return z
  }
  
  // Lsh sets z to the rounded x * (1<<s) and returns z.
+// If z's precision is 0, it is set to x's precision.
  // Rounding is performed according to z's precision
  // and rounding mode; and z's accuracy reports the
  // result error relative to the exact (not rounded)
  // result.
  func (z *Float) Lsh(x *Float, s uint, mode RoundingMode) *Float {
+       if z.prec == 0 {
+               z.prec = x.prec
+       }
+
+       // TODO(gri) handle Inf
+
         z.Round(x, z.prec, mode)
         z.setExp(int64(z.exp) + int64(s))
         return z
  }
  
  // Rsh sets z to the rounded x / (1<<s) and returns z.
-// Rounding is performed according to z's precision
-// and rounding mode; and z's accuracy reports the
-// result error relative to the exact (not rounded)
-// result.
+// Precision, rounding, and accuracy reporting are as for Lsh.
  func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float {
+       if z.prec == 0 {
+               z.prec = x.prec
+       }
+
+       // TODO(gri) handle Inf
+
         z.Round(x, z.prec, mode)
         z.setExp(int64(z.exp) - int64(s))
         return z
@@ -910,6 +976,8 @@ func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float {
  //   +1 if x >  y
  //
  func (x *Float) Cmp(y *Float) int {
+       // TODO(gri) handle Inf
+
         // special cases
         switch {
         case len(x.mant) == 0:
@@ -943,7 +1011,7 @@ func (x *Float) Cmp(y *Float) int {
  // Sign returns:
  //
  //     -1 if x <  0
-//      0 if x == 0 (incl. x == -0)
+//      0 if x == 0 (incl. x == -0) // TODO(gri) is this correct?
  //     +1 if x >  0
  //
  func (x *Float) Sign() int {
@@ -955,3 +1023,10 @@ func (x *Float) Sign() int {
         }
         return 1
  }
+
+func umax(x, y uint) uint {
+       if x < y {
+               return x
+       }
+       return y
+}
diff --git a/src/math/big/float_test.go b/src/math/big/float_test.go

index e37d2ed365af71738cc7ebcd3c336c742e66ef73..979b739b08cc782cf7470540f5252d35a7c69845 100644 (file)
--- a/src/math/big/float_test.go
+++ b/src/math/big/float_test.go
@@ -6,11 +6,70 @@ package big
  
  import (
         "fmt"
+       "math"
         "sort"
         "strconv"
         "testing"
  )
  
+func TestFloatZeroValue(t *testing.T) {
+       // zero (uninitialized) value is a ready-to-use 0.0
+       var x Float
+       if s := x.Format('f', 1); s != "0.0" {
+               t.Errorf("zero value = %s; want 0.0", s)
+       }
+
+       // zero value has precision 0
+       if prec := x.Precision(); prec != 0 {
+               t.Errorf("prec = %d; want 0", prec)
+       }
+
+       // zero value can be used in any and all positions of binary operations
+       make := func(x int) *Float {
+               if x == 0 {
+                       return new(Float) // 0 translates into the zero value
+               }
+               return NewFloat(float64(x), 10, 0)
+       }
+       for _, test := range []struct {
+               z, x, y, want int
+               opname        rune
+               op            func(z, x, y *Float) *Float
+       }{
+               {0, 0, 0, 0, '+', (*Float).Add},
+               {0, 1, 2, 3, '+', (*Float).Add},
+               {1, 2, 0, 2, '+', (*Float).Add},
+               {2, 0, 1, 1, '+', (*Float).Add},
+
+               {0, 0, 0, 0, '-', (*Float).Sub},
+               {0, 1, 2, -1, '-', (*Float).Sub},
+               {1, 2, 0, 2, '-', (*Float).Sub},
+               {2, 0, 1, -1, '-', (*Float).Sub},
+
+               {0, 0, 0, 0, '*', (*Float).Mul},
+               {0, 1, 2, 2, '*', (*Float).Mul},
+               {1, 2, 0, 0, '*', (*Float).Mul},
+               {2, 0, 1, 0, '*', (*Float).Mul},
+
+               {0, 0, 0, 0, '/', (*Float).Quo},
+               {0, 2, 1, 2, '/', (*Float).Quo},
+               {1, 2, 0, 0, '/', (*Float).Quo},
+               {2, 0, 1, 0, '/', (*Float).Quo},
+       } {
+               z := make(test.z)
+               test.op(z, make(test.x), make(test.y))
+               if got := int(z.Int64()); got != test.want {
+                       t.Errorf("%d %c %d = %d; want %d", test.x, test.opname, test.y, got, test.want)
+               }
+       }
+
+       // TODO(gri) test how precision is set for zero value results
+}
+
+func TestFloatInf(t *testing.T) {
+       // TODO(gri) implement this
+}
+
  func fromBinary(s string) int64 {
         x, err := strconv.ParseInt(s, 2, 64)
         if err != nil {
@@ -244,6 +303,9 @@ func TestFloatSetFloat64(t *testing.T) {
                 3.14159265e10,
                 2.718281828e-123,
                 1.0 / 3,
+               math.Inf(-1),
+               math.Inf(0),
+               -math.Inf(1),
         } {
                 for i := range [2]int{} {
                         if i&1 != 0 {
diff --git a/src/math/big/floatconv.go b/src/math/big/floatconv.go

index 7628e77d9a7b1db3e150680ff5b41912579c24a3..e3611b234baf1dba2cdf13dd86dad761b1869546 100644 (file)
--- a/src/math/big/floatconv.go
+++ b/src/math/big/floatconv.go
@@ -191,13 +191,26 @@ func (x *Float) Format(format byte, prec int) string {
  // Append appends the string form of the floating-point number x,
  // as generated by x.Format, to buf and returns the extended buffer.
  func (x *Float) Append(buf []byte, format byte, prec int) []byte {
-       // pick off simple cases
+       // TODO(gri) factor out handling of sign?
+
+       // Inf
+       if x.IsInf(0) {
+               var ch byte = '+'
+               if x.neg {
+                       ch = '-'
+               }
+               buf = append(buf, ch)
+               return append(buf, "Inf"...)
+       }
+
+       // easy formats
         switch format {
         case 'b':
                 return x.bstring(buf)
         case 'p':
                 return x.pstring(buf)
         }
+
         return x.bigFtoa(buf, format, prec)
  }
  
@@ -212,7 +225,6 @@ func (x *Float) String() string {
  // The mantissa is normalized such that is uses x.Precision() bits in binary
  // representation.
  func (x *Float) bstring(buf []byte) []byte {
-       // TODO(gri) handle Inf
         if x.neg {
                 buf = append(buf, '-')
         }
@@ -240,7 +252,6 @@ func (x *Float) bstring(buf []byte) []byte {
  // ad returns the extended buffer.
  // The mantissa is normalized such that 0.5 <= 0.mantissa < 1.0.
  func (x *Float) pstring(buf []byte) []byte {
-       // TODO(gri) handle Inf
         if x.neg {
                 buf = append(buf, '-')
         }
diff --git a/src/math/big/floatconv_test.go b/src/math/big/floatconv_test.go

index d3290dddd0acaceaea376ec371a180ccc9df8954..11e5df448a6be97f008f96d2a663d6538383c6c4 100644 (file)
--- a/src/math/big/floatconv_test.go
+++ b/src/math/big/floatconv_test.go
@@ -192,9 +192,9 @@ func TestFloat64Format(t *testing.T) {
  
                 // {math.NaN(), 'g', -1, "NaN"},
                 // {-math.NaN(), 'g', -1, "NaN"},
-               // {math.Inf(0), 'g', -1, "+Inf"},
-               // {math.Inf(-1), 'g', -1, "-Inf"},
-               // {-math.Inf(0), 'g', -1, "-Inf"},
+               {math.Inf(0), 'g', -1, "+Inf"},
+               {math.Inf(-1), 'g', -1, "-Inf"},
+               {-math.Inf(0), 'g', -1, "-Inf"},
  
                 {-1, 'b', -1, "-4503599627370496p-52"},
author	Robert Griesemer <gri@golang.org>
	Fri, 6 Feb 2015 01:21:48 +0000 (17:21 -0800)
committer	Robert Griesemer <gri@golang.org>
	Fri, 6 Feb 2015 17:21:01 +0000 (17:21 +0000)
src/math/big/float.go		patch \| blob \| history
src/math/big/float_test.go		patch \| blob \| history
src/math/big/floatconv.go		patch \| blob \| history
src/math/big/floatconv_test.go		patch \| blob \| history