From 961195ae6995a72c6f329e6397d93d62ed09135e Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Mon, 28 Jan 2013 18:00:15 -0500 Subject: [PATCH] math/big: add Rat.{,Set}Float64 methods for IEEE 754 conversions. Added tests, using input data from strconv.ParseFloat. Thanks to rsc for most of the test code. math/big could use some good package-level documentation. R=remyoudompheng, rsc CC=golang-dev https://golang.org/cl/6930059 --- src/pkg/math/big/rat.go | 151 +++++++++++++ src/pkg/math/big/rat_test.go | 404 +++++++++++++++++++++++++++++++++++ 2 files changed, 555 insertions(+) diff --git a/src/pkg/math/big/rat.go b/src/pkg/math/big/rat.go index eccf34e482..3e6473d922 100644 --- a/src/pkg/math/big/rat.go +++ b/src/pkg/math/big/rat.go @@ -10,6 +10,7 @@ import ( "encoding/binary" "errors" "fmt" + "math" "strings" ) @@ -27,6 +28,156 @@ func NewRat(a, b int64) *Rat { return new(Rat).SetFrac64(a, b) } +// SetFloat64 sets z to exactly f and returns z. +// If f is not finite, SetFloat returns nil. +func (z *Rat) SetFloat64(f float64) *Rat { + const expMask = 1<<11 - 1 + bits := math.Float64bits(f) + mantissa := bits & (1<<52 - 1) + exp := int((bits >> 52) & expMask) + switch exp { + case expMask: // non-finite + return nil + case 0: // denormal + exp -= 1022 + default: // normal + mantissa |= 1 << 52 + exp -= 1023 + } + + shift := 52 - exp + + // Optimisation (?): partially pre-normalise. + for mantissa&1 == 0 && shift > 0 { + mantissa >>= 1 + shift-- + } + + z.a.SetUint64(mantissa) + z.a.neg = f < 0 + z.b.Set(intOne) + if shift > 0 { + z.b.Lsh(&z.b, uint(shift)) + } else { + z.a.Lsh(&z.a, uint(-shift)) + } + return z.norm() +} + +// isFinite reports whether f represents a finite rational value. +// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). +func isFinite(f float64) bool { + return math.Abs(f) <= math.MaxFloat64 +} + +// low64 returns the least significant 64 bits of natural number z. +func low64(z nat) uint64 { + if len(z) == 0 { + return 0 + } + if _W == 32 && len(z) > 1 { + return uint64(z[1])<<32 | uint64(z[0]) + } + return uint64(z[0]) +} + +// quotToFloat returns the non-negative IEEE 754 double-precision +// value nearest to the quotient a/b, using round-to-even in halfway +// cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat(a, b nat) (f float64, exact bool) { + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<<53, 1<<55). + // (54 bits if A=B.) + // This is 2 or 3 more than the float64 mantissa field width of 52: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in float64 "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := 54 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low64(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half + + // 3. If quotient didn't fit in 54 bits, re-do division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>54 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ + } + if mantissa>>53 != 1 { + panic("expected exactly 54 bits of result") + } + + // 4. Rounding. + if -1022-52 <= exp && exp <= -1022 { + // Denormal case; lose 'shift' bits of precision. + shift := uint64(-1022 - (exp - 1)) // [1..53) + lostbits := mantissa & (1<>= shift + exp = -1023 + 2 + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1<<54 { + // Complete rollover 11...1 => 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 2^53. + + f = math.Ldexp(float64(mantissa), exp-53) + if math.IsInf(f, 0) { + exact = false + } + return +} + +// Float64 returns the nearest float64 value to z. +// If z is exactly representable as a float64, Float64 returns exact=true. +// If z is negative, so too is f, even if f==0. +func (z *Rat) Float64() (f float64, exact bool) { + b := z.b.abs + if len(b) == 0 { + b = b.set(natOne) // materialize denominator + } + f, exact = quotToFloat(z.a.abs, b) + if z.a.neg { + f = -f + } + return +} + // SetFrac sets z to a/b and returns z. func (z *Rat) SetFrac(a, b *Int) *Rat { z.a.neg = a.neg != b.neg diff --git a/src/pkg/math/big/rat_test.go b/src/pkg/math/big/rat_test.go index 7c634233ff..4b4134b410 100644 --- a/src/pkg/math/big/rat_test.go +++ b/src/pkg/math/big/rat_test.go @@ -8,6 +8,9 @@ import ( "bytes" "encoding/gob" "fmt" + "math" + "strconv" + "strings" "testing" ) @@ -496,3 +499,404 @@ func TestIssue3521(t *testing.T) { t.Errorf("3) got %s want %s", x, q53) } } + +// Test inputs to Rat.SetString. The optional prefix "slow:" skips +// checks found to be slow for certain large rationals. +var float64inputs = []string{ + // + // Constants plundered from strconv/testfp.txt. + // + + // Table 1: Stress Inputs for Conversion to 53-bit Binary, < 1/2 ULP + "5e+125", + "69e+267", + "999e-026", + "7861e-034", + "75569e-254", + "928609e-261", + "9210917e+080", + "84863171e+114", + "653777767e+273", + "5232604057e-298", + "27235667517e-109", + "653532977297e-123", + "3142213164987e-294", + "46202199371337e-072", + "231010996856685e-073", + "9324754620109615e+212", + "78459735791271921e+049", + "272104041512242479e+200", + "6802601037806061975e+198", + "20505426358836677347e-221", + "836168422905420598437e-234", + "4891559871276714924261e+222", + + // Table 2: Stress Inputs for Conversion to 53-bit Binary, > 1/2 ULP + "9e-265", + "85e-037", + "623e+100", + "3571e+263", + "81661e+153", + "920657e-023", + "4603285e-024", + "87575437e-309", + "245540327e+122", + "6138508175e+120", + "83356057653e+193", + "619534293513e+124", + "2335141086879e+218", + "36167929443327e-159", + "609610927149051e-255", + "3743626360493413e-165", + "94080055902682397e-242", + "899810892172646163e+283", + "7120190517612959703e+120", + "25188282901709339043e-252", + "308984926168550152811e-052", + "6372891218502368041059e+064", + + // Table 14: Stress Inputs for Conversion to 24-bit Binary, <1/2 ULP + "5e-20", + "67e+14", + "985e+15", + "7693e-42", + "55895e-16", + "996622e-44", + "7038531e-32", + "60419369e-46", + "702990899e-20", + "6930161142e-48", + "25933168707e+13", + "596428896559e+20", + + // Table 15: Stress Inputs for Conversion to 24-bit Binary, >1/2 ULP + "3e-23", + "57e+18", + "789e-35", + "2539e-18", + "76173e+28", + "887745e-11", + "5382571e-37", + "82381273e-35", + "750486563e-38", + "3752432815e-39", + "75224575729e-45", + "459926601011e+15", + + // + // Constants plundered from strconv/atof_test.go. + // + + "0", + "1", + "+1", + "1e23", + "1E23", + "100000000000000000000000", + "1e-100", + "123456700", + "99999999999999974834176", + "100000000000000000000001", + "100000000000000008388608", + "100000000000000016777215", + "100000000000000016777216", + "-1", + "-0.1", + "-0", // NB: exception made for this input + "1e-20", + "625e-3", + + // largest float64 + "1.7976931348623157e308", + "-1.7976931348623157e308", + // next float64 - too large + "1.7976931348623159e308", + "-1.7976931348623159e308", + // the border is ...158079 + // borderline - okay + "1.7976931348623158e308", + "-1.7976931348623158e308", + // borderline - too large + "1.797693134862315808e308", + "-1.797693134862315808e308", + + // a little too large + "1e308", + "2e308", + "1e309", + + // way too large + "1e310", + "-1e310", + "1e400", + "-1e400", + "1e400000", + "-1e400000", + + // denormalized + "1e-305", + "1e-306", + "1e-307", + "1e-308", + "1e-309", + "1e-310", + "1e-322", + // smallest denormal + "5e-324", + "4e-324", + "3e-324", + // too small + "2e-324", + // way too small + "1e-350", + "slow:1e-400000", + // way too small, negative + "-1e-350", + "slow:-1e-400000", + + // try to overflow exponent + // [Disabled: too slow and memory-hungry with rationals.] + // "1e-4294967296", + // "1e+4294967296", + // "1e-18446744073709551616", + // "1e+18446744073709551616", + + // http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/ + "2.2250738585072012e-308", + // http://www.exploringbinary.com/php-hangs-on-numeric-value-2-2250738585072011e-308/ + + "2.2250738585072011e-308", + + // A very large number (initially wrongly parsed by the fast algorithm). + "4.630813248087435e+307", + + // A different kind of very large number. + "22.222222222222222", + "2." + strings.Repeat("2", 4000) + "e+1", + + // Exactly halfway between 1 and math.Nextafter(1, 2). + // Round to even (down). + "1.00000000000000011102230246251565404236316680908203125", + // Slightly lower; still round down. + "1.00000000000000011102230246251565404236316680908203124", + // Slightly higher; round up. + "1.00000000000000011102230246251565404236316680908203126", + // Slightly higher, but you have to read all the way to the end. + "slow:1.00000000000000011102230246251565404236316680908203125" + strings.Repeat("0", 10000) + "1", + + // Smallest denormal, 2^(-1022-52) + "4.940656458412465441765687928682213723651e-324", + // Half of smallest denormal, 2^(-1022-53) + "2.470328229206232720882843964341106861825e-324", + // A little more than the exact half of smallest denormal + // 2^-1075 + 2^-1100. (Rounds to 1p-1074.) + "2.470328302827751011111470718709768633275e-324", + // The exact halfway between smallest normal and largest denormal: + // 2^-1022 - 2^-1075. (Rounds to 2^-1022.) + "2.225073858507201136057409796709131975935e-308", + + "1152921504606846975", // 1<<60 - 1 + "-1152921504606846975", // -(1<<60 - 1) + "1152921504606846977", // 1<<60 + 1 + "-1152921504606846977", // -(1<<60 + 1) + + "1/3", +} + +func TestFloat64SpecialCases(t *testing.T) { + for _, input := range float64inputs { + slow := strings.HasPrefix(input, "slow:") + if slow { + input = input[len("slow:"):] + } + + r, ok := new(Rat).SetString(input) + if !ok { + t.Errorf("Rat.SetString(%q) failed", input) + continue + } + f, exact := r.Float64() + + // 1. Check string -> Rat -> float64 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e, _ := strconv.ParseFloat(input, 64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float64bits(e) == math.Float64bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta=%g", input, e, e, f, f, f-e) + } + } + + if !isFinite(f) || slow { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float64().exact = %b, want %b", input, exact, wasExact) + } + } +} + +func TestFloat64Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float64 range, and check Rat.Float64() + // always picks the closest float64 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + const winc, einc = 5, 100 // quick test (<1s) + //const winc, einc = 1, 1 // soak test (~75s) + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 60; wid += winc { + b := int64(1< 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float64() + + if !checkIsBestApprox(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f=%g (%b); f~%g; r=%v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip(t, f) + } + } + } + } +} + +// TestFloat64NonFinite checks that SetFloat64 of a non-finite value +// returns nil. +func TestSetFloat64NonFinite(t *testing.T) { + for _, f := range []float64{math.NaN(), math.Inf(+1), math.Inf(-1)} { + var r Rat + if r2 := r.SetFloat64(f); r2 != nil { + t.Errorf("SetFloat64(%g) was %v, want nil", f, r2) + } + } +} + +// checkNonLossyRoundtrip checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip(t *testing.T, f float64) { + if !isFinite(f) { + return + } + r := new(Rat).SetFloat64(f) + if r == nil { + t.Errorf("Rat.SetFloat64(%g (%b)) == nil", f, f) + return + } + f2, exact := r.Float64() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(%g).Float64() = %g (%b), %v, want %g (%b), %v; delta=%b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// delta returns the absolute difference between r and f. +func delta(r *Rat, f float64) *Rat { + d := new(Rat).Sub(r, new(Rat).SetFloat64(f)) + return d.Abs(d) +} + +// checkIsBestApprox checks that f is the best possible float64 +// approximation of r. +// Returns true on success. +func checkIsBestApprox(t *testing.T, f float64, r *Rat) bool { + if math.Abs(f) >= math.MaxFloat64 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter(f, math.Inf(-1)) + f1 := math.Nextafter(f, math.Inf(+1)) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, f) + df0 := delta(r, f0) + df1 := delta(r, f1) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but previous float64 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven(f) { + t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +func isEven(f float64) bool { return math.Float64bits(f)&1 == 0 } + +func TestIsFinite(t *testing.T) { + finites := []float64{ + 1.0 / 3, + 4891559871276714924261e+222, + math.MaxFloat64, + math.SmallestNonzeroFloat64, + -math.MaxFloat64, + -math.SmallestNonzeroFloat64, + } + for _, f := range finites { + if !isFinite(f) { + t.Errorf("!IsFinite(%g (%b))", f, f) + } + } + nonfinites := []float64{ + math.NaN(), + math.Inf(-1), + math.Inf(+1), + } + for _, f := range nonfinites { + if isFinite(f) { + t.Errorf("IsFinite(%g, (%b))", f, f) + } + } +} -- 2.48.1