From: Robert Griesemer Date: Wed, 11 Jun 2014 16:10:49 +0000 (-0700) Subject: math/big: implement Rat.Float32 X-Git-Tag: go1.4beta1~1319 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=be91bc29a43ae582b6ca7f6adf561cfb25bd6911;p=gostls13.git math/big: implement Rat.Float32 Pending CL 101750048. For submission after the 1.3 release. Fixes #8065. LGTM=adonovan R=adonovan CC=golang-codereviews https://golang.org/cl/93550043 --- diff --git a/src/pkg/math/big/int.go b/src/pkg/math/big/int.go index 269949d616..e70d0489be 100644 --- a/src/pkg/math/big/int.go +++ b/src/pkg/math/big/int.go @@ -510,10 +510,30 @@ func (z *Int) Scan(s fmt.ScanState, ch rune) error { return err } +// low32 returns the least significant 32 bits of z. +func low32(z nat) uint32 { + if len(z) == 0 { + return 0 + } + return uint32(z[0]) +} + +// low64 returns the least significant 64 bits of z. +func low64(z nat) uint64 { + if len(z) == 0 { + return 0 + } + v := uint64(z[0]) + if _W == 32 && len(z) > 1 { + v |= uint64(z[1]) << 32 + } + return v +} + // Int64 returns the int64 representation of x. // If x cannot be represented in an int64, the result is undefined. func (x *Int) Int64() int64 { - v := int64(x.Uint64()) + v := int64(low64(x.abs)) if x.neg { v = -v } @@ -523,14 +543,7 @@ func (x *Int) Int64() int64 { // Uint64 returns the uint64 representation of x. // If x cannot be represented in a uint64, the result is undefined. func (x *Int) Uint64() uint64 { - if len(x.abs) == 0 { - return 0 - } - v := uint64(x.abs[0]) - if _W == 32 && len(x.abs) > 1 { - v |= uint64(x.abs[1]) << 32 - } - return v + return low64(x.abs) } // SetString sets z to the value of s, interpreted in the given base, diff --git a/src/pkg/math/big/rat.go b/src/pkg/math/big/rat.go index f0973b3902..e6ab0bb483 100644 --- a/src/pkg/math/big/rat.go +++ b/src/pkg/math/big/rat.go @@ -64,28 +64,125 @@ func (z *Rat) SetFloat64(f float64) *Rat { return z.norm() } -// isFinite reports whether f represents a finite rational value. -// It is equivalent to !math.IsNan(f) && !math.IsInf(f, 0). -func isFinite(f float64) bool { - return math.Abs(f) <= math.MaxFloat64 -} +// quotToFloat32 returns the non-negative float32 value +// nearest to the quotient a/b, using round-to-even in +// halfway cases. It does not mutate its arguments. +// Preconditions: b is non-zero; a and b have no common factors. +func quotToFloat32(a, b nat) (f float32, exact bool) { + const ( + // float size in bits + Fsize = 32 + + // mantissa + Msize = 23 + Msize1 = Msize + 1 // incl. implicit 1 + Msize2 = Msize1 + 1 + + // exponent + Esize = Fsize - Msize1 + Ebias = 1<<(Esize-1) - 1 + Emin = 1 - Ebias + Emax = Ebias + ) + + // TODO(adonovan): specialize common degenerate cases: 1.0, integers. + alen := a.bitLen() + if alen == 0 { + return 0, true + } + blen := b.bitLen() + if blen == 0 { + panic("division by zero") + } + + // 1. Left-shift A or B such that quotient A/B is in [1<= B). + // This is 2 or 3 more than the float32 mantissa field width of Msize: + // - the optional extra bit is shifted away in step 3 below. + // - the high-order 1 is omitted in "normal" representation; + // - the low-order 1 will be used during rounding then discarded. + exp := alen - blen + var a2, b2 nat + a2 = a2.set(a) + b2 = b2.set(b) + if shift := Msize2 - exp; shift > 0 { + a2 = a2.shl(a2, uint(shift)) + } else if shift < 0 { + b2 = b2.shl(b2, uint(-shift)) + } + + // 2. Compute quotient and remainder (q, r). NB: due to the + // extra shift, the low-order bit of q is logically the + // high-order bit of r. + var q nat + q, r := q.div(a2, a2, b2) // (recycle a2) + mantissa := low32(q) + haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half -// low64 returns the least significant 64 bits of natural number z. -func low64(z nat) uint64 { - if len(z) == 0 { - return 0 + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 + // (in effect---we accomplish this incrementally). + if mantissa>>Msize2 == 1 { + if mantissa&1 == 1 { + haveRem = true + } + mantissa >>= 1 + exp++ } - if _W == 32 && len(z) > 1 { - return uint64(z[1])<<32 | uint64(z[0]) + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) } - return uint64(z[0]) + + // 4. Rounding. + if Emin-Msize <= exp && exp <= Emin { + // Denormal case; lose 'shift' bits of precision. + shift := uint(Emin - (exp - 1)) // [1..Esize1) + lostbits := mantissa & (1<>= shift + exp = 2 - Ebias // == exp + shift + } + // Round q using round-half-to-even. + exact = !haveRem + if mantissa&1 != 0 { + exact = false + if haveRem || mantissa&2 != 0 { + if mantissa++; mantissa >= 1< 100...0, so shift is safe + mantissa >>= 1 + exp++ + } + } + } + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1<=B.) - // This is 2 or 3 more than the float64 mantissa field width of 52: + // 1. Left-shift A or B such that quotient A/B is in [1<= B). + // This is 2 or 3 more than the float64 mantissa field width of Msize: // - the optional extra bit is shifted away in step 3 below. - // - the high-order 1 is omitted in float64 "normal" representation; + // - the high-order 1 is omitted in "normal" representation; // - the low-order 1 will be used during rounding then discarded. exp := alen - blen var a2, b2 nat a2 = a2.set(a) b2 = b2.set(b) - if shift := 54 - exp; shift > 0 { + if shift := Msize2 - exp; shift > 0 { a2 = a2.shl(a2, uint(shift)) } else if shift < 0 { b2 = b2.shl(b2, uint(-shift)) @@ -120,49 +217,65 @@ func quotToFloat(a, b nat) (f float64, exact bool) { mantissa := low64(q) haveRem := len(r) > 0 // mantissa&1 && !haveRem => remainder is exactly half - // 3. If quotient didn't fit in 54 bits, re-do division by b2<<1 + // 3. If quotient didn't fit in Msize2 bits, redo division by b2<<1 // (in effect---we accomplish this incrementally). - if mantissa>>54 == 1 { + if mantissa>>Msize2 == 1 { if mantissa&1 == 1 { haveRem = true } mantissa >>= 1 exp++ } - if mantissa>>53 != 1 { - panic("expected exactly 54 bits of result") + if mantissa>>Msize1 != 1 { + panic(fmt.Sprintf("expected exactly %d bits of result", Msize2)) } // 4. Rounding. - if -1022-52 <= exp && exp <= -1022 { + if Emin-Msize <= exp && exp <= Emin { // Denormal case; lose 'shift' bits of precision. - shift := uint64(-1022 - (exp - 1)) // [1..53) + shift := uint(Emin - (exp - 1)) // [1..Esize1) lostbits := mantissa & (1<>= shift - exp = -1023 + 2 + exp = 2 - Ebias // == exp + shift } // Round q using round-half-to-even. exact = !haveRem if mantissa&1 != 0 { exact = false if haveRem || mantissa&2 != 0 { - if mantissa++; mantissa >= 1<<54 { + if mantissa++; mantissa >= 1< 100...0, so shift is safe mantissa >>= 1 exp++ } } } - mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 2^53. + mantissa >>= 1 // discard rounding bit. Mantissa now scaled by 1< Rat -> float32 conversions are + // consistent with strconv.ParseFloat. + // Skip this check if the input uses "a/b" rational syntax. + if !strings.Contains(input, "/") { + e64, _ := strconv.ParseFloat(input, 32) + e := float32(e64) + + // Careful: negative Rats too small for + // float64 become -0, but Rat obviously cannot + // preserve the sign from SetString("-0"). + switch { + case math.Float32bits(e) == math.Float32bits(f): + // Ok: bitwise equal. + case f == 0 && r.Num().BitLen() == 0: + // Ok: Rat(0) is equivalent to both +/- float64(0). + default: + t.Errorf("strconv.ParseFloat(%q) = %g (%b), want %g (%b); delta = %g", input, e, e, f, f, f-e) + } + } + + if !isFinite(float64(f)) { + continue + } + + // 2. Check f is best approximation to r. + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was %q)", input) + } + + // 3. Check f->R->f roundtrip is non-lossy. + checkNonLossyRoundtrip32(t, f) + + // 4. Check exactness using slow algorithm. + if wasExact := new(Rat).SetFloat64(float64(f)).Cmp(r) == 0; wasExact != exact { + t.Errorf("Rat.SetString(%q).Float32().exact = %t, want %t", input, exact, wasExact) + } + } +} + func TestFloat64SpecialCases(t *testing.T) { for _, input := range float64inputs { if strings.HasPrefix(input, "long:") { @@ -830,13 +891,13 @@ func TestFloat64SpecialCases(t *testing.T) { } // 2. Check f is best approximation to r. - if !checkIsBestApprox(t, f, r) { + if !checkIsBestApprox64(t, f, r) { // Append context information. t.Errorf("(input was %q)", input) } // 3. Check f->R->f roundtrip is non-lossy. - checkNonLossyRoundtrip(t, f) + checkNonLossyRoundtrip64(t, f) // 4. Check exactness using slow algorithm. if wasExact := new(Rat).SetFloat64(f).Cmp(r) == 0; wasExact != exact { @@ -845,6 +906,54 @@ func TestFloat64SpecialCases(t *testing.T) { } } +func TestFloat32Distribution(t *testing.T) { + // Generate a distribution of (sign, mantissa, exp) values + // broader than the float32 range, and check Rat.Float32() + // always picks the closest float32 approximation. + var add = []int64{ + 0, + 1, + 3, + 5, + 7, + 9, + 11, + } + var winc, einc = uint64(1), 1 // soak test (~1.5s on x86-64) + if testing.Short() { + winc, einc = 5, 15 // quick test (~60ms on x86-64) + } + + for _, sign := range "+-" { + for _, a := range add { + for wid := uint64(0); wid < 30; wid += winc { + b := 1< 0 { + num.Lsh(num, uint(exp)) + } else { + den.Lsh(den, uint(-exp)) + } + r := new(Rat).SetFrac(num, den) + f, _ := r.Float32() + + if !checkIsBestApprox32(t, f, r) { + // Append context information. + t.Errorf("(input was mantissa %#x, exp %d; f = %g (%b); f ~ %g; r = %v)", + b, exp, f, f, math.Ldexp(float64(b), exp), r) + } + + checkNonLossyRoundtrip32(t, f) + } + } + } + } +} + func TestFloat64Distribution(t *testing.T) { // Generate a distribution of (sign, mantissa, exp) values // broader than the float64 range, and check Rat.Float64() @@ -858,7 +967,7 @@ func TestFloat64Distribution(t *testing.T) { 9, 11, } - var winc, einc = uint64(1), int(1) // soak test (~75s on x86-64) + var winc, einc = uint64(1), 1 // soak test (~75s on x86-64) if testing.Short() { winc, einc = 10, 500 // quick test (~12ms on x86-64) } @@ -866,7 +975,7 @@ func TestFloat64Distribution(t *testing.T) { for _, sign := range "+-" { for _, a := range add { for wid := uint64(0); wid < 60; wid += winc { - b := int64(1<Rat->float roundtrip is +// checkNonLossyRoundtrip32 checks that a float->Rat->float roundtrip is // non-lossy for finite f. -func checkNonLossyRoundtrip(t *testing.T, f float64) { +func checkNonLossyRoundtrip32(t *testing.T, f float32) { + if !isFinite(float64(f)) { + return + } + r := new(Rat).SetFloat64(float64(f)) + if r == nil { + t.Errorf("Rat.SetFloat64(float64(%g) (%b)) == nil", f, f) + return + } + f2, exact := r.Float32() + if f != f2 || !exact { + t.Errorf("Rat.SetFloat64(float64(%g)).Float32() = %g (%b), %v, want %g (%b), %v; delta = %b", + f, f2, f2, exact, f, f, true, f2-f) + } +} + +// checkNonLossyRoundtrip64 checks that a float->Rat->float roundtrip is +// non-lossy for finite f. +func checkNonLossyRoundtrip64(t *testing.T, f float64) { if !isFinite(f) { return } @@ -928,10 +1055,47 @@ func delta(r *Rat, f float64) *Rat { return d.Abs(d) } -// checkIsBestApprox checks that f is the best possible float64 +// checkIsBestApprox32 checks that f is the best possible float32 +// approximation of r. +// Returns true on success. +func checkIsBestApprox32(t *testing.T, f float32, r *Rat) bool { + if math.Abs(float64(f)) >= math.MaxFloat32 { + // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat32). + // But we have tests for these special cases. + return true + } + + // r must be strictly between f0 and f1, the floats bracketing f. + f0 := math.Nextafter32(f, float32(math.Inf(-1))) + f1 := math.Nextafter32(f, float32(math.Inf(+1))) + + // For f to be correct, r must be closer to f than to f0 or f1. + df := delta(r, float64(f)) + df0 := delta(r, float64(f0)) + df1 := delta(r, float64(f1)) + if df.Cmp(df0) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but previous float32 %g (%b) is closer", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) > 0 { + t.Errorf("Rat(%v).Float32() = %g (%b), but next float32 %g (%b) is closer", r, f, f, f1, f1) + return false + } + if df.Cmp(df0) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) + return false + } + if df.Cmp(df1) == 0 && !isEven32(f) { + t.Errorf("Rat(%v).Float32() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) + return false + } + return true +} + +// checkIsBestApprox64 checks that f is the best possible float64 // approximation of r. // Returns true on success. -func checkIsBestApprox(t *testing.T, f float64, r *Rat) bool { +func checkIsBestApprox64(t *testing.T, f float64, r *Rat) bool { if math.Abs(f) >= math.MaxFloat64 { // Cannot check +Inf, -Inf, nor the float next to them (MaxFloat64). // But we have tests for these special cases. @@ -939,8 +1103,8 @@ func checkIsBestApprox(t *testing.T, f float64, r *Rat) bool { } // r must be strictly between f0 and f1, the floats bracketing f. - f0 := math.Nextafter(f, math.Inf(-1)) - f1 := math.Nextafter(f, math.Inf(+1)) + f0 := math.Nextafter64(f, math.Inf(-1)) + f1 := math.Nextafter64(f, math.Inf(+1)) // For f to be correct, r must be closer to f than to f0 or f1. df := delta(r, f) @@ -954,18 +1118,19 @@ func checkIsBestApprox(t *testing.T, f float64, r *Rat) bool { t.Errorf("Rat(%v).Float64() = %g (%b), but next float64 %g (%b) is closer", r, f, f, f1, f1) return false } - if df.Cmp(df0) == 0 && !isEven(f) { + if df.Cmp(df0) == 0 && !isEven64(f) { t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f0, f0) return false } - if df.Cmp(df1) == 0 && !isEven(f) { + if df.Cmp(df1) == 0 && !isEven64(f) { t.Errorf("Rat(%v).Float64() = %g (%b); halfway should have rounded to %g (%b) instead", r, f, f, f1, f1) return false } return true } -func isEven(f float64) bool { return math.Float64bits(f)&1 == 0 } +func isEven32(f float32) bool { return math.Float32bits(f)&1 == 0 } +func isEven64(f float64) bool { return math.Float64bits(f)&1 == 0 } func TestIsFinite(t *testing.T) { finites := []float64{