From 5ffbca4823455d42ef6314c67c1cc346599d65bf Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Thu, 21 May 2015 18:00:54 -0700 Subject: [PATCH] math/big: fix Float.Float64 conversion for denormal corner cases - This change uses the same code as for Float32 and fixes the case of a number that gets rounded up to the smallest denormal. - Enabled correspoding test case. Change-Id: I8aac874a566cd727863a82717854f603fbdc26c6 Reviewed-on: https://go-review.googlesource.com/10352 Reviewed-by: Alan Donovan --- src/math/big/float.go | 74 +++++++++++++++++++------------------- src/math/big/float_test.go | 5 ++- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/math/big/float.go b/src/math/big/float.go index b666b9dd38..dcb72c5754 100644 --- a/src/math/big/float.go +++ b/src/math/big/float.go @@ -959,10 +959,6 @@ func (x *Float) Float32() (float32, Accuracy) { panic("unreachable") } -// TODO(gri) Use same algorithm for Float64 as for Float32. The Float64 code -// is incorrect for some corner cases (numbers that get rounded up to smallest -// denormal - test cases are missing). - // Float64 returns the float64 value nearest to x. If x is too small to be // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result // is (0, Below) or (-0, Above), respectively, depending on the sign of x. @@ -987,64 +983,70 @@ func (x *Float) Float64() (float64, Accuracy) { emax = bias // 1023 largest unbiased exponent (normal) ) - // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. - // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. - // For a given mantissa m, we need to add 1 to a floatxx exponent to get the - // corresponding Float exponent. - // (see also implementation of math.Ldexp for similar code) + // Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa. + e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0 + p := mbits + 1 // precision of normal float - if x.exp < dmin+1 { - // underflow - if x.neg { - var z float64 - return -z, Above + // If the exponent is too small, we may have a denormal number + // in which case we have fewer mantissa bits available: reduce + // precision accordingly. + if e < emin { + p -= emin - int(e) + // Make sure we have at least 1 bit so that we don't + // lose numbers rounded up to the smallest denormal. + if p < 1 { + p = 1 } - return 0.0, Below } - // x.exp >= dmin+1 + // round var r Float - r.prec = mbits + 1 // +1 for implicit msb - if x.exp < emin+1 { - // denormal number - round to fewer bits - r.prec = uint32(x.exp - dmin) - } + r.prec = uint32(p) r.Set(x) + e = r.exp - 1 // Rounding may have caused r to overflow to ±Inf // (rounding never causes underflows to 0). if r.form == inf { - r.exp = emax + 2 // cause overflow below + e = emax + 1 // cause overflow below } - if r.exp > emax+1 { + // If the exponent is too large, overflow to ±Inf. + if e > emax { // overflow if x.neg { return math.Inf(-1), Below } return math.Inf(+1), Above } - // dmin+1 <= r.exp <= emax+1 - var s uint64 - if r.neg { - s = 1 << (fbits - 1) + // Determine sign, biased exponent, and mantissa. + var sign, bexp, mant uint64 + if x.neg { + sign = 1 << (fbits - 1) } - m := high64(r.mant) >> ebits & (1<> (fbits - r.prec) + } else { + // normal number: emin <= e <= emax + bexp = uint64(e+bias) << mbits + mant = high64(r.mant) >> ebits & (1<