--- /dev/null
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file implements multi-precision floating-point numbers.
+// Like in the GNU MPFR library (http://www.mpfr.org/), operands
+// can be of mixed precision. Unlike MPFR, the rounding mode is
+// not specified with each operation, but with each operand. The
+// rounding mode of the result operand determines the rounding
+// mode of an operation. This is a from-scratch implementation.
+
+// CAUTION: WORK IN PROGRESS - ANY ASPECT OF THIS IMPLEMENTATION MAY CHANGE!
+
+package big
+
+import (
+ "fmt"
+ "io"
+ "math"
+ "strings"
+)
+
+// TODO(gri): Determine if there's a more natural way to set the precision.
+// Should there be a special meaning for prec 0? Such as "full precision"?
+// (would be possible for all ops except quotient).
+
+const debugFloat = true // enable for debugging
+
+// Internal representation: A floating-point value x != 0 consists
+// of a sign (x.neg), mantissa (x.mant), and exponent (x.exp) such
+// that
+//
+// x = sign * 0.mantissa * 2**exponent
+//
+// and the mantissa is interpreted as a value between 0.5 and 1:
+//
+// 0.5 <= mantissa < 1.0
+//
+// The mantissa bits are stored in the shortest nat slice long enough
+// to hold x.prec mantissa bits. The mantissa is normalized such that
+// the msb of x.mant == 1. Thus, if the precision is not a multiple of
+// the Word size _W, x.mant[0] contains trailing zero bits. The number
+// 0 is represented by an empty mantissa and a zero exponent.
+
+// A Float represents a multi-precision floating point number
+// of the form
+//
+// sign * mantissa * 2**exponent
+//
+// Each value also has a precision, rounding mode, and accuracy value:
+// The precision is the number of mantissa bits used to represent a
+// value, and the result of operations is rounded to that many bits
+// according to the value's rounding mode (unless specified othewise).
+// The accuracy value indicates the rounding error with respect to the
+// exact (not rounded) value.
+//
+// The zero value for a Float represents the number 0.
+//
+// By setting the desired precision to 24 (or 53) and using ToNearestEven
+// rounding, Float arithmetic operations emulate the corresponding float32
+// or float64 IEEE-754 operations (except for denormalized numbers and NaNs).
+//
+// CAUTION: THIS IS WORK IN PROGRESS - DO NOT USE YET.
+//
+type Float struct {
+ mode RoundingMode
+ acc Accuracy
+ neg bool
+ mant nat
+ exp int32
+ prec uint // TODO(gri) make this a 32bit field
+}
+
+// NewFloat returns a new Float with value x rounded
+// to prec bits according to the given rounding mode.
+func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
+ // TODO(gri) should make this more efficient
+ z := new(Float).SetFloat64(x)
+ return z.Round(z, prec, mode)
+}
+
+// infExp is the exponent value for infinity.
+const infExp = 1<<31 - 1
+
+// NewInf returns a new Float with value positive infinity (sign >= 0),
+// or negative infinity (sign < 0).
+func NewInf(sign int) *Float {
+ return &Float{neg: sign < 0, exp: infExp}
+}
+
+func (z *Float) setExp(e int64) {
+ e32 := int32(e)
+ if int64(e32) != e {
+ panic("exponent overflow") // TODO(gri) handle this gracefully
+ }
+ z.exp = e32
+}
+
+// Accuracy describes the rounding error produced by the most recent
+// operation that generated a Float value, relative to the exact value:
+//
+// -1: below exact value
+// 0: exact value
+// +1: above exact value
+//
+type Accuracy int8
+
+// Constants describing the Accuracy of a Float.
+const (
+ Below Accuracy = -1
+ Exact Accuracy = 0
+ Above Accuracy = +1
+)
+
+func (a Accuracy) String() string {
+ switch {
+ case a < 0:
+ return "below"
+ default:
+ return "exact"
+ case a > 0:
+ return "above"
+ }
+}
+
+// RoundingMode determines how a Float value is rounded to the
+// desired precision. Rounding may change the Float value; the
+// rounding error is described by the Float's Accuracy.
+type RoundingMode uint8
+
+// The following rounding modes are supported.
+const (
+ ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
+ ToNearestAway // == IEEE 754-2008 roundTiesToAway
+ ToZero // == IEEE 754-2008 roundTowardZero
+ AwayFromZero // no IEEE 754-2008 equivalent
+ ToNegativeInf // == IEEE 754-2008 roundTowardNegative
+ ToPositiveInf // == IEEE 754-2008 roundTowardPositive
+)
+
+func (mode RoundingMode) String() string {
+ switch mode {
+ case ToNearestEven:
+ return "ToNearestEven"
+ case ToNearestAway:
+ return "ToNearestAway"
+ case ToZero:
+ return "ToZero"
+ case AwayFromZero:
+ return "AwayFromZero"
+ case ToNegativeInf:
+ return "ToNegativeInf"
+ case ToPositiveInf:
+ return "ToPositiveInf"
+ }
+ panic("unreachable")
+}
+
+// Precision returns the mantissa precision of x in bits.
+// The precision may be 0 if x == 0. // TODO(gri) Determine a better approach.
+func (x *Float) Precision() uint {
+ return uint(x.prec)
+}
+
+// Accuracy returns the accuracy of x produced by the most recent operation.
+func (x *Float) Accuracy() Accuracy {
+ return x.acc
+}
+
+// Mode returns the rounding mode of x.
+func (x *Float) Mode() RoundingMode {
+ return x.mode
+}
+
+// debugging support
+func (x *Float) validate() {
+ // assumes x != 0
+ const msb = 1 << (_W - 1)
+ m := len(x.mant)
+ if x.mant[m-1]&msb == 0 {
+ panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.PString()))
+ }
+ if x.prec <= 0 {
+ panic(fmt.Sprintf("invalid precision %d", x.prec))
+ }
+}
+
+// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
+// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
+// have before calling round. z's mantissa must be normalized, with the msb set.
+func (z *Float) round(sbit uint) {
+ z.acc = Exact
+
+ // handle zero
+ m := uint(len(z.mant)) // mantissa length in words for current precision
+ if m == 0 {
+ z.exp = 0
+ return
+ }
+
+ if debugFloat {
+ z.validate()
+ }
+ // z.prec > 0
+
+ bits := m * _W // available mantissa bits
+ if bits == z.prec {
+ // mantissa fits Exactly => nothing to do
+ return
+ }
+
+ n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
+ if bits < z.prec {
+ // mantissa too small => extend
+ if m < n {
+ // slice too short => extend slice
+ if int(n) <= cap(z.mant) {
+ // reuse existing slice
+ z.mant = z.mant[:n]
+ copy(z.mant[n-m:], z.mant[:m])
+ z.mant[:n-m].clear()
+ } else {
+ // n > cap(z.mant) => allocate new slice
+ const e = 4 // extra capacity (see nat.make)
+ new := make(nat, n, n+e)
+ copy(new[n-m:], z.mant)
+ }
+ }
+ return
+ }
+
+ // Rounding is based on two bits: the rounding bit (rbit) and the
+ // sticky bit (sbit). The rbit is the bit immediately before the
+ // mantissa bits (the "0.5"). The sbit is set if any of the bits
+ // before the rbit are set (the "0.25", "0.125", etc.):
+ //
+ // rbit sbit => "fractional part"
+ //
+ // 0 0 == 0
+ // 0 1 > 0 , < 0.5
+ // 1 0 == 0.5
+ // 1 1 > 0.5, < 1.0
+
+ // bits > z.prec: mantissa too large => round
+ r := bits - z.prec - 1 // rounding bit position; r >= 0
+ rbit := z.mant.bit(r) // rounding bit
+ if sbit == 0 {
+ sbit = z.mant.sticky(r)
+ }
+ if debugFloat && sbit&^1 != 0 {
+ panic(fmt.Sprintf("invalid sbit %#x", sbit))
+ }
+
+ // convert ToXInf rounding modes
+ mode := z.mode
+ switch mode {
+ case ToNegativeInf:
+ mode = ToZero
+ if z.neg {
+ mode = AwayFromZero
+ }
+ case ToPositiveInf:
+ mode = AwayFromZero
+ if z.neg {
+ mode = ToZero
+ }
+ }
+
+ // cut off extra words
+ if m > n {
+ copy(z.mant, z.mant[m-n:]) // move n last words to front
+ z.mant = z.mant[:n]
+ }
+
+ // determine number of trailing zero bits t
+ t := n*_W - z.prec // 0 <= t < _W
+ lsb := Word(1) << t
+
+ // make rounding decision
+ // TODO(gri) This can be simplified (see roundBits in float_test.go).
+ switch mode {
+ case ToZero:
+ // nothing to do
+ case ToNearestEven, ToNearestAway:
+ if rbit == 0 {
+ // rounding bits == 0b0x
+ mode = ToZero
+ } else if sbit == 1 {
+ // rounding bits == 0b11
+ mode = AwayFromZero
+ }
+ case AwayFromZero:
+ if rbit|sbit == 0 {
+ mode = ToZero
+ }
+ default:
+ // ToXInf modes have been converted to ToZero or AwayFromZero
+ panic("unreachable")
+ }
+
+ // round and determine accuracy
+ switch mode {
+ case ToZero:
+ if rbit|sbit != 0 {
+ z.acc = Below
+ }
+
+ case ToNearestEven, ToNearestAway:
+ if debugFloat && rbit != 1 {
+ panic("internal error in rounding")
+ }
+ if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
+ z.acc = Below
+ break
+ }
+ // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
+ fallthrough
+
+ case AwayFromZero:
+ // add 1 to mantissa
+ if addVW(z.mant, z.mant, lsb) != 0 {
+ // overflow => shift mantissa right by 1 and add msb
+ shrVU(z.mant, z.mant, 1)
+ z.mant[n-1] |= 1 << (_W - 1)
+ // adjust exponent
+ z.exp++
+ }
+ z.acc = Above
+ }
+
+ // zero out trailing bits in least-significant word
+ z.mant[0] &^= lsb - 1
+
+ // update accuracy
+ if z.neg {
+ z.acc = -z.acc
+ }
+
+ if debugFloat {
+ z.validate()
+ }
+
+ return
+}
+
+// Round sets z to the value of x rounded according to mode to prec bits and returns z.
+func (z *Float) Round(x *Float, prec uint, mode RoundingMode) *Float {
+ z.Set(x)
+ z.prec = prec
+ z.mode = mode
+ z.round(0)
+ return z
+}
+
+// nlz returns the number of leading zero bits in x.
+func nlz(x Word) uint {
+ return _W - uint(bitLen(x))
+}
+
+// TODO(gri) this assumes a Word is 64 bits
+func nlz64(x uint64) uint {
+ if _W != 64 {
+ panic("size mismatch")
+ }
+ return nlz(Word(x))
+}
+
+// SetUint64 sets z to x and returns z.
+// Precision is set to 64 bits.
+func (z *Float) SetUint64(x uint64) *Float {
+ z.neg = false
+ z.prec = 64
+ if x == 0 {
+ z.mant = z.mant[:0]
+ z.exp = 0
+ return z
+ }
+ s := nlz64(x)
+ z.mant = z.mant.setUint64(x << s)
+ z.exp = int32(64 - s)
+ return z
+}
+
+// SetInt64 sets z to x and returns z.
+// Precision is set to 64 bits.
+func (z *Float) SetInt64(x int64) *Float {
+ u := x
+ if u < 0 {
+ u = -u
+ }
+ z.SetUint64(uint64(u))
+ z.neg = x < 0
+ return z
+}
+
+// SetFloat64 sets z to x and returns z.
+// Precision is set to 53 bits.
+// TODO(gri) test denormals, +/-Inf, disallow NaN.
+func (z *Float) SetFloat64(x float64) *Float {
+ z.prec = 53
+ if x == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ return z
+ }
+ z.neg = x < 0
+ fmant, exp := math.Frexp(x) // get normalized mantissa
+ z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
+ z.exp = int32(exp)
+ return z
+}
+
+// fnorm normalizes mantissa m by shifting it to the left
+// such that the msb of the most-significant word (msw)
+// is 1. It returns the shift amount.
+// It assumes that m is not the zero nat.
+func fnorm(m nat) uint {
+ if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
+ panic("msw of mantissa is 0")
+ }
+ s := nlz(m[len(m)-1])
+ if s > 0 {
+ c := shlVU(m, m, s)
+ if debugFloat && c != 0 {
+ panic("nlz or shlVU incorrect")
+ }
+ }
+ return s
+}
+
+// SetInt sets z to x and returns z.
+// Precision is set to the number of bits required to represent x accurately.
+// TODO(gri) what about precision for x == 0?
+func (z *Float) SetInt(x *Int) *Float {
+ if len(x.abs) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ // z.prec = ?
+ return z
+ }
+ // x != 0
+ z.neg = x.neg
+ z.mant = z.mant.set(x.abs)
+ e := uint(len(z.mant))*_W - fnorm(z.mant)
+ z.exp = int32(e)
+ z.prec = e
+ return z
+}
+
+// SetRat sets z to x rounded to the precision of z and returns z.
+func (z *Float) SetRat(x *Rat, prec uint) *Float {
+ panic("unimplemented")
+}
+
+// Set sets z to x, with the same precision as x, and returns z.
+func (z *Float) Set(x *Float) *Float {
+ if z != x {
+ z.neg = x.neg
+ z.exp = x.exp
+ z.mant = z.mant.set(x.mant)
+ z.prec = x.prec
+ }
+ return z
+}
+
+func high64(x nat) uint64 {
+ if len(x) == 0 {
+ return 0
+ }
+ v := uint64(x[len(x)-1])
+ if _W == 32 && len(x) > 1 {
+ v = v<<32 | uint64(x[len(x)-2])
+ }
+ return v
+}
+
+// TODO(gri) FIX THIS (rounding mode, errors, accuracy, etc.)
+func (x *Float) Uint64() uint64 {
+ m := high64(x.mant)
+ s := x.exp
+ if s >= 0 {
+ return m >> (64 - uint(s))
+ }
+ return 0 // imprecise
+}
+
+// TODO(gri) FIX THIS (rounding mode, errors, etc.)
+func (x *Float) Int64() int64 {
+ v := int64(x.Uint64())
+ if x.neg {
+ return -v
+ }
+ return v
+}
+
+// Float64 returns the closest float64 value of x
+// by rounding to nearest with 53 bits precision.
+// TODO(gri) implement/document error scenarios.
+func (x *Float) Float64() (float64, Accuracy) {
+ if len(x.mant) == 0 {
+ return 0, Exact
+ }
+ // x != 0
+ r := new(Float).Round(x, 53, ToNearestEven)
+ var s uint64
+ if r.neg {
+ s = 1 << 63
+ }
+ e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
+ m := high64(r.mant) >> 11 & (1<<52 - 1)
+ return math.Float64frombits(s | e<<52 | m), r.acc
+}
+
+func (x *Float) Int() *Int {
+ if len(x.mant) == 0 {
+ return new(Int)
+ }
+ panic("unimplemented")
+}
+
+func (x *Float) Rat() *Rat {
+ panic("unimplemented")
+}
+
+func (x *Float) IsInt() bool {
+ if len(x.mant) == 0 {
+ return true
+ }
+ if x.exp <= 0 {
+ return false
+ }
+ if uint(x.exp) >= x.prec {
+ return true
+ }
+ panic("unimplemented")
+}
+
+// Abs sets z to |x| (the absolute value of x) and returns z.
+// TODO(gri) should Abs (and Neg) below ignore z's precision and rounding mode?
+func (z *Float) Abs(x *Float) *Float {
+ z.Set(x)
+ z.neg = false
+ return z
+}
+
+// Neg sets z to x with its sign negated, and returns z.
+func (z *Float) Neg(x *Float) *Float {
+ z.Set(x)
+ z.neg = !z.neg
+ return z
+}
+
+// z = x + y, ignoring signs of x and y.
+// x and y must not be 0.
+func (z *Float) uadd(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("uadd called with 0 argument")
+ }
+
+ // Note: This implementation requires 2 shifts most of the
+ // time. It is also inefficient if exponents or precisions
+ // differ by wide margins. The following article describes
+ // an efficient (but much more complicated) implementation
+ // compatible with the internal representation used here:
+ //
+ // Vincent Lefèvre: "The Generic Multiple-Precision Floating-
+ // Point Addition With Exact Rounding (as in the MPFR Library)"
+ // http://www.vinc17.net/research/papers/rnc6.pdf
+
+ // order x, y by magnitude
+ ex := int(x.exp) - len(x.mant)*_W
+ ey := int(y.exp) - len(y.mant)*_W
+ if ex < ey {
+ // + is commutative => ok to swap operands
+ x, y = y, x
+ ex, ey = ey, ex
+ }
+ // ex >= ey
+ d := uint(ex - ey)
+
+ // compute adjusted xmant
+ var n0 uint // nlz(z) before addition
+ xadj := x.mant
+ if d > 0 {
+ xadj = z.mant.shl(x.mant, d) // 1st shift
+ n0 = _W - d%_W
+ }
+ z.exp = x.exp
+
+ // add numbers
+ z.mant = z.mant.add(xadj, y.mant)
+
+ // normalize mantissa
+ n1 := fnorm(z.mant) // 2nd shift (often)
+
+ // adjust exponent if the result got longer (by at most 1 bit)
+ if n1 != n0 {
+ if debugFloat && (n1+1)%_W != n0 {
+ panic(fmt.Sprintf("carry is %d bits, expected at most 1 bit", n0-n1))
+ }
+ z.exp++
+ }
+
+ z.round(0)
+}
+
+// z = x - y for x >= y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) usub(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("usub called with 0 argument")
+ }
+
+ // Note: Like uadd, this implementation is often doing
+ // too much work and could be optimized by separating
+ // the various special cases.
+
+ // determine magnitude difference
+ ex := int(x.exp) - len(x.mant)*_W
+ ey := int(y.exp) - len(y.mant)*_W
+
+ if ex < ey {
+ panic("underflow")
+ }
+ // ex >= ey
+ d := uint(ex - ey)
+
+ // compute adjusted x.mant
+ var n uint // nlz(z) after adjustment
+ xadj := x.mant
+ if d > 0 {
+ xadj = z.mant.shl(x.mant, d)
+ n = _W - d%_W
+ }
+ e := int64(x.exp) + int64(n)
+
+ // subtract numbers
+ z.mant = z.mant.sub(xadj, y.mant)
+
+ if len(z.mant) != 0 {
+ e -= int64(len(xadj)-len(z.mant)) * _W
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+ }
+
+ z.round(0)
+}
+
+// z = x * y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) umul(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("umul called with 0 argument")
+ }
+
+ // Note: This is doing too much work if the precision
+ // of z is less than the sum of the precisions of x
+ // and y which is often the case (e.g., if all floats
+ // have the same precision).
+ // TODO(gri) Optimize this for the common case.
+
+ e := int64(x.exp) + int64(y.exp)
+ z.mant = z.mant.mul(x.mant, y.mant)
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+ z.round(0)
+}
+
+// z = x / y, ignoring signs of x and y.
+// x and y must not be zero.
+func (z *Float) uquo(x, y *Float) {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("uquo called with 0 argument")
+ }
+
+ // mantissa length in words for desired result precision + 1
+ // (at least one extra bit so we get the rounding bit after
+ // the division)
+ n := int(z.prec/_W) + 1
+
+ // compute adjusted x.mant such that we get enough result precision
+ xadj := x.mant
+ if d := n - len(x.mant) + len(y.mant); d > 0 {
+ // d extra words needed => add d "0 digits" to x
+ xadj = make(nat, len(x.mant)+d)
+ copy(xadj[d:], x.mant)
+ }
+ // TODO(gri): If we have too many digits (d < 0), we should be able
+ // to shorten x for faster division. But we must be extra careful
+ // with rounding in that case.
+
+ // divide
+ var r nat
+ z.mant, r = z.mant.div(nil, xadj, y.mant)
+
+ // determine exponent
+ e := int64(x.exp) - int64(y.exp) - int64(len(xadj)-len(y.mant)-len(z.mant))*_W
+
+ // normalize mantissa
+ z.setExp(e - int64(fnorm(z.mant)))
+
+ // The result is long enough to include (at least) the rounding bit.
+ // If there's a non-zero remainder, the corresponding fractional part
+ // (if it were computed), would have a non-zero sticky bit (if it were
+ // zero, it couldn't have a non-zero remainder).
+ var sbit uint
+ if len(r) > 0 {
+ sbit = 1
+ }
+ z.round(sbit)
+}
+
+// ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y,
+// while ignoring the signs of x and y. x and y must not be zero.
+func (x *Float) ucmp(y *Float) int {
+ if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
+ panic("ucmp called with 0 argument")
+ }
+
+ switch {
+ case x.exp < y.exp:
+ return -1
+ case x.exp > y.exp:
+ return 1
+ }
+ // x.exp == y.exp
+
+ // compare mantissas
+ i := len(x.mant)
+ j := len(y.mant)
+ for i > 0 || j > 0 {
+ var xm, ym Word
+ if i > 0 {
+ i--
+ xm = x.mant[i]
+ }
+ if j > 0 {
+ j--
+ ym = y.mant[j]
+ }
+ switch {
+ case xm < ym:
+ return -1
+ case xm > ym:
+ return 1
+ }
+ }
+
+ return 0
+}
+
+// Handling of sign bit as defined by IEEE 754-2008,
+// section 6.3 (note that there are no NaN Floats):
+//
+// When neither the inputs nor result are NaN, the sign of a product or
+// quotient is the exclusive OR of the operands’ signs; the sign of a sum,
+// or of a difference x−y regarded as a sum x+(−y), differs from at most
+// one of the addends’ signs; and the sign of the result of conversions,
+// the quantize operation, the roundToIntegral operations, and the
+// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
+// These rules shall apply even when operands or results are zero or infinite.
+//
+// When the sum of two operands with opposite signs (or the difference of
+// two operands with like signs) is exactly zero, the sign of that sum (or
+// difference) shall be +0 in all rounding-direction attributes except
+// roundTowardNegative; under that attribute, the sign of an exact zero
+// sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
+// sign as x even when x is zero.
+
+// Add sets z to the rounded sum x+y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Add(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(y.mant) == 0 {
+ return z.Round(x, z.prec, z.mode)
+ }
+ if len(x.mant) == 0 {
+ return z.Round(y, z.prec, z.mode)
+ }
+
+ // x, y != 0
+ neg := x.neg
+ if x.neg == y.neg {
+ // x + y == x + y
+ // (-x) + (-y) == -(x + y)
+ z.uadd(x, y)
+ } else {
+ // x + (-y) == x - y == -(y - x)
+ // (-x) + y == y - x == -(x - y)
+ if x.ucmp(y) >= 0 {
+ z.usub(x, y)
+ } else {
+ neg = !neg
+ z.usub(y, x)
+ }
+ }
+ z.neg = neg
+ return z
+}
+
+// Sub sets z to the rounded difference x-y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Sub(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(y.mant) == 0 {
+ return z.Round(x, z.prec, z.mode)
+ }
+ if len(x.mant) == 0 {
+ prec := z.prec
+ mode := z.mode
+ z.Neg(y)
+ return z.Round(z, prec, mode)
+ }
+
+ // x, y != 0
+ neg := x.neg
+ if x.neg != y.neg {
+ // x - (-y) == x + y
+ // (-x) - y == -(x + y)
+ z.uadd(x, y)
+ } else {
+ // x - y == x - y == -(y - x)
+ // (-x) - (-y) == y - x == -(x - y)
+ if x.ucmp(y) >= 0 {
+ z.usub(x, y)
+ } else {
+ neg = !neg
+ z.usub(y, x)
+ }
+ }
+ z.neg = neg
+ return z
+}
+
+// Mul sets z to the rounded product x*y and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Mul(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(x.mant) == 0 || len(y.mant) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ z.acc = Exact
+ return z
+ }
+
+ // x, y != 0
+ z.umul(x, y)
+ z.neg = x.neg != y.neg
+ return z
+}
+
+// Quo sets z to the rounded quotient x/y and returns z.
+// If y == 0, a division-by-zero run-time panic occurs. TODO(gri) this should become Inf
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Quo(x, y *Float) *Float {
+ // TODO(gri) what about -0?
+ if len(x.mant) == 0 {
+ z.neg = false
+ z.mant = z.mant[:0]
+ z.exp = 0
+ z.acc = Exact
+ return z
+ }
+ if len(y.mant) == 0 {
+ panic("division-by-zero") // TODO(gri) handle this better
+ }
+
+ // x, y != 0
+ z.uquo(x, y)
+ z.neg = x.neg != y.neg
+ return z
+}
+
+// Lsh sets z to the rounded x * (1<<s) and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Lsh(x *Float, s uint, mode RoundingMode) *Float {
+ z.Round(x, z.prec, mode)
+ z.setExp(int64(z.exp) + int64(s))
+ return z
+}
+
+// Rsh sets z to the rounded x / (1<<s) and returns z.
+// Rounding is performed according to z's precision
+// and rounding mode; and z's accuracy reports the
+// result error relative to the exact (not rounded)
+// result.
+func (z *Float) Rsh(x *Float, s uint, mode RoundingMode) *Float {
+ z.Round(x, z.prec, mode)
+ z.setExp(int64(z.exp) - int64(s))
+ return z
+}
+
+// Cmp compares x and y and returns:
+//
+// -1 if x < y
+// 0 if x == y (incl. -0 == 0)
+// +1 if x > y
+//
+func (x *Float) Cmp(y *Float) int {
+ // special cases
+ switch {
+ case len(x.mant) == 0:
+ // 0 cmp y == -sign(y)
+ return -y.Sign()
+ case len(y.mant) == 0:
+ // x cmp 0 == sign(x)
+ return x.Sign()
+ }
+ // x != 0 && y != 0
+
+ // x cmp y == x cmp y
+ // x cmp (-y) == 1
+ // (-x) cmp y == -1
+ // (-x) cmp (-y) == -(x cmp y)
+ switch {
+ case x.neg == y.neg:
+ r := x.ucmp(y)
+ if x.neg {
+ r = -r
+ }
+ return r
+ case x.neg:
+ return -1
+ default:
+ return 1
+ }
+ return 0
+}
+
+// Sign returns:
+//
+// -1 if x < 0
+// 0 if x == 0 (incl. x == -0)
+// +1 if x > 0
+//
+func (x *Float) Sign() int {
+ if len(x.mant) == 0 {
+ return 0
+ }
+ if x.neg {
+ return -1
+ }
+ return 1
+}
+
+func (x *Float) String() string {
+ return x.PString() // TODO(gri) fix this
+}
+
+// PString returns x as a string in the format ["-"] "0x" mantissa "p" exponent,
+// with a hexadecimal mantissa and a signed decimal exponent.
+func (x *Float) PString() string {
+ prefix := "0."
+ if x.neg {
+ prefix = "-0."
+ }
+ return prefix + x.mant.string(lowercaseDigits[:16]) + fmt.Sprintf("p%d", x.exp)
+}
+
+// SetString sets z to the value of s and returns z and a boolean indicating
+// success. s must be a floating-point number of the form:
+//
+// number = [ sign ] mantissa [ exponent ] .
+// mantissa = digits | digits "." [ digits ] | "." digits .
+// exponent = ( "E" | "e" | "p" ) [ sign ] digits .
+// sign = "+" | "-" .
+// digits = digit { digit } .
+// digit = "0" ... "9" .
+//
+// A "p" exponent indicates power of 2 for the exponent; for instance 1.2p3
+// is 1.2 * 2**3. If the operation failed, the value of z is undefined but
+// the returned value is nil.
+//
+func (z *Float) SetString(s string) (*Float, bool) {
+ r := strings.NewReader(s)
+
+ f, err := z.scan(r)
+ if err != nil {
+ return nil, false
+ }
+
+ // there should be no unread characters left
+ if _, _, err = r.ReadRune(); err != io.EOF {
+ return nil, false
+ }
+
+ return f, true
+}
+
+// scan sets z to the value of the longest prefix of r representing
+// a floating-point number and returns z or an error, if any.
+// The number must be of the form:
+//
+// number = [ sign ] mantissa [ exponent ] .
+// mantissa = digits | digits "." [ digits ] | "." digits .
+// exponent = ( "E" | "e" | "p" ) [ sign ] digits .
+// sign = "+" | "-" .
+// digits = digit { digit } .
+// digit = "0" ... "9" .
+//
+// A "p" exponent indicates power of 2 for the exponent; for instance 1.2p3
+// is 1.2 * 2**3. If the operation failed, the value of z is undefined but
+// the returned value is nil.
+//
+func (z *Float) scan(r io.RuneScanner) (f *Float, err error) {
+ // sign
+ z.neg, err = scanSign(r)
+ if err != nil {
+ return
+ }
+
+ // mantissa
+ var ecorr int // decimal exponent correction; valid if <= 0
+ z.mant, _, ecorr, err = z.mant.scan(r, 1)
+ if err != nil {
+ return
+ }
+
+ // exponent
+ var exp int64
+ var ebase int
+ exp, ebase, err = scanExponent(r)
+ if err != nil {
+ return
+ }
+ // special-case 0
+ if len(z.mant) == 0 {
+ z.exp = 0
+ return z, nil
+ }
+ // len(z.mant) > 0
+
+ // determine binary (exp2) and decimal (exp) exponent
+ exp2 := int64(len(z.mant)*_W - int(fnorm(z.mant)))
+ if ebase == 2 {
+ exp2 += exp
+ exp = 0
+ }
+ if ecorr < 0 {
+ exp += int64(ecorr)
+ }
+
+ z.setExp(exp2)
+ if exp == 0 {
+ // no decimal exponent
+ z.round(0)
+ return z, nil
+ }
+ // exp != 0
+
+ // compute decimal exponent power
+ expabs := exp
+ if expabs < 0 {
+ expabs = -expabs
+ }
+ powTen := new(Float).SetInt(new(Int).SetBits(nat(nil).expNN(natTen, nat(nil).setWord(Word(expabs)), nil)))
+
+ // correct result
+ if exp < 0 {
+ z.uquo(z, powTen)
+ } else {
+ z.umul(z, powTen)
+ }
+
+ return z, nil
+}
--- /dev/null
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package big
+
+import (
+ "fmt"
+ "sort"
+ "strconv"
+ "testing"
+)
+
+func fromBinary(s string) int64 {
+ x, err := strconv.ParseInt(s, 2, 64)
+ if err != nil {
+ panic(err)
+ }
+ return x
+}
+
+func toBinary(x int64) string {
+ return strconv.FormatInt(x, 2)
+}
+
+func testFloatRound(t *testing.T, x, r int64, prec uint, mode RoundingMode) {
+ // verify test data
+ var ok bool
+ switch mode {
+ case ToNearestEven, ToNearestAway:
+ ok = true // nothing to do for now
+ case ToZero:
+ if x < 0 {
+ ok = r >= x
+ } else {
+ ok = r <= x
+ }
+ case AwayFromZero:
+ if x < 0 {
+ ok = r <= x
+ } else {
+ ok = r >= x
+ }
+ case ToNegativeInf:
+ ok = r <= x
+ case ToPositiveInf:
+ ok = r >= x
+ default:
+ panic("unreachable")
+ }
+ if !ok {
+ t.Fatalf("incorrect test data for prec = %d, %s: x = %s, r = %s", prec, mode, toBinary(x), toBinary(r))
+ }
+
+ // compute expected accuracy
+ a := Exact
+ switch {
+ case r < x:
+ a = Below
+ case r > x:
+ a = Above
+ }
+
+ // round
+ f := new(Float).SetInt64(x)
+ f.Round(f, prec, mode)
+
+ // check result
+ r1 := f.Int64()
+ p1 := f.Precision()
+ a1 := f.Accuracy()
+ if r1 != r || p1 != prec || a1 != a {
+ t.Errorf("Round(%s, %d, %s): got %s (%d bits, %s); want %s (%d bits, %s)",
+ toBinary(x), prec, mode,
+ toBinary(r1), p1, a1,
+ toBinary(r), prec, a)
+ }
+}
+
+// TestFloatRound tests basic rounding.
+func TestFloatRound(t *testing.T) {
+ var tests = []struct {
+ prec uint
+ x, zero, neven, naway, away string // input, results rounded to prec bits
+ }{
+ {5, "1000", "1000", "1000", "1000", "1000"},
+ {5, "1001", "1001", "1001", "1001", "1001"},
+ {5, "1010", "1010", "1010", "1010", "1010"},
+ {5, "1011", "1011", "1011", "1011", "1011"},
+ {5, "1100", "1100", "1100", "1100", "1100"},
+ {5, "1101", "1101", "1101", "1101", "1101"},
+ {5, "1110", "1110", "1110", "1110", "1110"},
+ {5, "1111", "1111", "1111", "1111", "1111"},
+
+ {4, "1000", "1000", "1000", "1000", "1000"},
+ {4, "1001", "1001", "1001", "1001", "1001"},
+ {4, "1010", "1010", "1010", "1010", "1010"},
+ {4, "1011", "1011", "1011", "1011", "1011"},
+ {4, "1100", "1100", "1100", "1100", "1100"},
+ {4, "1101", "1101", "1101", "1101", "1101"},
+ {4, "1110", "1110", "1110", "1110", "1110"},
+ {4, "1111", "1111", "1111", "1111", "1111"},
+
+ {3, "1000", "1000", "1000", "1000", "1000"},
+ {3, "1001", "1000", "1000", "1010", "1010"},
+ {3, "1010", "1010", "1010", "1010", "1010"},
+ {3, "1011", "1010", "1100", "1100", "1100"},
+ {3, "1100", "1100", "1100", "1100", "1100"},
+ {3, "1101", "1100", "1100", "1110", "1110"},
+ {3, "1110", "1110", "1110", "1110", "1110"},
+ {3, "1111", "1110", "10000", "10000", "10000"},
+
+ {3, "1000001", "1000000", "1000000", "1000000", "1010000"},
+ {3, "1001001", "1000000", "1010000", "1010000", "1010000"},
+ {3, "1010001", "1010000", "1010000", "1010000", "1100000"},
+ {3, "1011001", "1010000", "1100000", "1100000", "1100000"},
+ {3, "1100001", "1100000", "1100000", "1100000", "1110000"},
+ {3, "1101001", "1100000", "1110000", "1110000", "1110000"},
+ {3, "1110001", "1110000", "1110000", "1110000", "10000000"},
+ {3, "1111001", "1110000", "10000000", "10000000", "10000000"},
+
+ {2, "1000", "1000", "1000", "1000", "1000"},
+ {2, "1001", "1000", "1000", "1000", "1100"},
+ {2, "1010", "1000", "1000", "1100", "1100"},
+ {2, "1011", "1000", "1100", "1100", "1100"},
+ {2, "1100", "1100", "1100", "1100", "1100"},
+ {2, "1101", "1100", "1100", "1100", "10000"},
+ {2, "1110", "1100", "10000", "10000", "10000"},
+ {2, "1111", "1100", "10000", "10000", "10000"},
+
+ {2, "1000001", "1000000", "1000000", "1000000", "1100000"},
+ {2, "1001001", "1000000", "1000000", "1000000", "1100000"},
+ {2, "1010001", "1000000", "1100000", "1100000", "1100000"},
+ {2, "1011001", "1000000", "1100000", "1100000", "1100000"},
+ {2, "1100001", "1100000", "1100000", "1100000", "10000000"},
+ {2, "1101001", "1100000", "1100000", "1100000", "10000000"},
+ {2, "1110001", "1100000", "10000000", "10000000", "10000000"},
+ {2, "1111001", "1100000", "10000000", "10000000", "10000000"},
+
+ {1, "1000", "1000", "1000", "1000", "1000"},
+ {1, "1001", "1000", "1000", "1000", "10000"},
+ {1, "1010", "1000", "1000", "1000", "10000"},
+ {1, "1011", "1000", "1000", "1000", "10000"},
+ {1, "1100", "1000", "10000", "10000", "10000"},
+ {1, "1101", "1000", "10000", "10000", "10000"},
+ {1, "1110", "1000", "10000", "10000", "10000"},
+ {1, "1111", "1000", "10000", "10000", "10000"},
+
+ {1, "1000001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1001001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1010001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1011001", "1000000", "1000000", "1000000", "10000000"},
+ {1, "1100001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1101001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1110001", "1000000", "10000000", "10000000", "10000000"},
+ {1, "1111001", "1000000", "10000000", "10000000", "10000000"},
+ }
+
+ for _, test := range tests {
+ x := fromBinary(test.x)
+ z := fromBinary(test.zero)
+ e := fromBinary(test.neven)
+ n := fromBinary(test.naway)
+ a := fromBinary(test.away)
+ prec := test.prec
+
+ testFloatRound(t, x, z, prec, ToZero)
+ testFloatRound(t, x, e, prec, ToNearestEven)
+ testFloatRound(t, x, n, prec, ToNearestAway)
+ testFloatRound(t, x, a, prec, AwayFromZero)
+
+ testFloatRound(t, x, z, prec, ToNegativeInf)
+ testFloatRound(t, x, a, prec, ToPositiveInf)
+
+ testFloatRound(t, -x, -a, prec, ToNegativeInf)
+ testFloatRound(t, -x, -z, prec, ToPositiveInf)
+ }
+}
+
+// TestFloatRound24 tests that rounding a float64 to 24 bits
+// matches IEEE-754 rounding to nearest when converting a
+// float64 to a float32.
+func TestFloatRound24(t *testing.T) {
+ const x0 = 1<<26 - 0x10 // 11...110000 (26 bits)
+ for d := 0; d <= 0x10; d++ {
+ x := float64(x0 + d)
+ f := new(Float).SetFloat64(x)
+ f.Round(f, 24, ToNearestEven)
+ got, _ := f.Float64()
+ want := float64(float32(x))
+ if got != want {
+ t.Errorf("Round(%g, 24) = %g; want %g", x, got, want)
+ }
+ }
+}
+
+func TestFloatSetUint64(t *testing.T) {
+ var tests = []uint64{
+ 0,
+ 1,
+ 2,
+ 10,
+ 100,
+ 1<<32 - 1,
+ 1 << 32,
+ 1<<64 - 1,
+ }
+ for _, want := range tests {
+ f := new(Float).SetUint64(want)
+ if got := f.Uint64(); got != want {
+ t.Errorf("got %d (%s); want %d", got, f.PString(), want)
+ }
+ }
+}
+
+func TestFloatSetInt64(t *testing.T) {
+ var tests = []int64{
+ 0,
+ 1,
+ 2,
+ 10,
+ 100,
+ 1<<32 - 1,
+ 1 << 32,
+ 1<<63 - 1,
+ }
+ for _, want := range tests {
+ for i := range [2]int{} {
+ if i&1 != 0 {
+ want = -want
+ }
+ f := new(Float).SetInt64(want)
+ if got := f.Int64(); got != want {
+ t.Errorf("got %d (%s); want %d", got, f.PString(), want)
+ }
+ }
+ }
+}
+
+func TestFloatSetFloat64(t *testing.T) {
+ var tests = []float64{
+ 0,
+ 1,
+ 2,
+ 12345,
+ 1e10,
+ 1e100,
+ 3.14159265e10,
+ 2.718281828e-123,
+ 1.0 / 3,
+ }
+ for _, want := range tests {
+ for i := range [2]int{} {
+ if i&1 != 0 {
+ want = -want
+ }
+ f := new(Float).SetFloat64(want)
+ if got, _ := f.Float64(); got != want {
+ t.Errorf("got %g (%s); want %g", got, f.PString(), want)
+ }
+ }
+ }
+}
+
+func TestFloatSetInt(t *testing.T) {
+ // TODO(gri) implement
+}
+
+// Selected precisions with which to run various tests.
+var precList = [...]uint{1, 2, 5, 8, 10, 16, 23, 24, 32, 50, 53, 64, 100, 128, 500, 511, 512, 513, 1000, 10000}
+
+// Selected bits with which to run various tests.
+// Each entry is a list of bits representing a floating-point number (see fromBits).
+var bitsList = [...][]int{
+ {}, // = 0
+ {0}, // = 1
+ {1}, // = 2
+ {-1}, // = 1/2
+ {10}, // = 2**10 == 1024
+ {-10}, // = 2**-10 == 1/1024
+ {100, 10, 1}, // = 2**100 + 2**10 + 2**1
+ {0, -1, -2, -10},
+ // TODO(gri) add more test cases
+}
+
+// TestFloatAdd tests Float.Add/Sub by comparing the result of a "manual"
+// addition/subtraction of arguments represented by bits lists with the
+// respective floating-point addition/subtraction for a variety of precisions
+// and rounding modes.
+func TestFloatAdd(t *testing.T) {
+ for _, xbits := range bitsList {
+ for _, ybits := range bitsList {
+ // exact values
+ x := fromBits(xbits...)
+ y := fromBits(ybits...)
+ zbits := append(xbits, ybits...)
+ z := fromBits(zbits...)
+
+ for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+ for _, prec := range precList {
+ // +
+ got := NewFloat(0, prec, mode)
+ got.Add(x, y)
+ want := roundBits(zbits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s %v\n\t+ %s %v\n\t= %s\n\twant %s",
+ i, prec, mode, x, xbits, y, ybits, got, want)
+ return
+ }
+
+ // -
+ got.Sub(z, x)
+ want = roundBits(ybits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t- %s\n\t= %s\n\twant %s",
+ i, prec, mode, x, y, got, want)
+ }
+ }
+ }
+ }
+ }
+}
+
+// TestFloatAdd32 tests that Float.Add/Sub of numbers with
+// 24bit mantissa behaves like float32 addition/subtraction.
+func TestFloatAdd32(t *testing.T) {
+ // chose base such that we cross the mantissa precision limit
+ const base = 1<<26 - 0x10 // 11...110000 (26 bits)
+ for d := 0; d <= 0x10; d++ {
+ for i := range [2]int{} {
+ x0, y0 := float64(base), float64(d)
+ if i&1 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 24, ToNearestEven)
+
+ z.Add(x, y)
+ got, acc := z.Float64()
+ want := float64(float32(y0) + float32(x0))
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g + %g = %g (%s); want %g exactly", d, x0, y0, got, acc, want)
+ }
+
+ z.Sub(z, y)
+ got, acc = z.Float64()
+ want = float64(float32(want) - float32(y0))
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g - %g = %g (%s); want %g exactly", d, x0+y0, y0, got, acc, want)
+ }
+ }
+ }
+}
+
+// TestFloatAdd64 tests that Float.Add/Sub of numbers with
+// 53bit mantissa behaves like float64 addition/subtraction.
+func TestFloatAdd64(t *testing.T) {
+ // chose base such that we cross the mantissa precision limit
+ const base = 1<<55 - 0x10 // 11...110000 (55 bits)
+ for d := 0; d <= 0x10; d++ {
+ for i := range [2]int{} {
+ x0, y0 := float64(base), float64(d)
+ if i&1 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 53, ToNearestEven)
+
+ z.Add(x, y)
+ got, acc := z.Float64()
+ want := x0 + y0
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g + %g = %g; want %g exactly", d, x0, y0, got, acc, want)
+ }
+
+ z.Sub(z, y)
+ got, acc = z.Float64()
+ want -= y0
+ if got != want || acc != Exact {
+ t.Errorf("d = %d: %g - %g = %g; want %g exactly", d, x0+y0, y0, got, acc, want)
+ }
+ }
+ }
+}
+
+func TestFloatMul(t *testing.T) {
+}
+
+// TestFloatMul64 tests that Float.Mul/Quo of numbers with
+// 53bit mantissa behaves like float64 multiplication/division.
+func TestFloatMul64(t *testing.T) {
+ var tests = []struct {
+ x, y float64
+ }{
+ {0, 0},
+ {0, 1},
+ {1, 1},
+ {1, 1.5},
+ {1.234, 0.5678},
+ {2.718281828, 3.14159265358979},
+ {2.718281828e10, 3.14159265358979e-32},
+ {1.0 / 3, 1e200},
+ }
+ for _, test := range tests {
+ for i := range [8]int{} {
+ x0, y0 := test.x, test.y
+ if i&1 != 0 {
+ x0 = -x0
+ }
+ if i&2 != 0 {
+ y0 = -y0
+ }
+ if i&4 != 0 {
+ x0, y0 = y0, x0
+ }
+
+ x := new(Float).SetFloat64(x0)
+ y := new(Float).SetFloat64(y0)
+ z := NewFloat(0, 53, ToNearestEven)
+
+ z.Mul(x, y)
+ got, _ := z.Float64()
+ want := x0 * y0
+ if got != want {
+ t.Errorf("%g * %g = %g; want %g", x0, y0, got, want)
+ }
+
+ if y0 == 0 {
+ continue // avoid division-by-zero
+ }
+ z.Quo(z, y)
+ got, _ = z.Float64()
+ want /= y0
+ if got != want {
+ t.Errorf("%g / %g = %g; want %g", x0*y0, y0, got, want)
+ }
+ }
+ }
+}
+
+func TestIssue6866(t *testing.T) {
+ for _, prec := range precList {
+ two := NewFloat(2, prec, ToNearestEven)
+ one := NewFloat(1, prec, ToNearestEven)
+ three := NewFloat(3, prec, ToNearestEven)
+ msix := NewFloat(-6, prec, ToNearestEven)
+ psix := NewFloat(+6, prec, ToNearestEven)
+
+ p := NewFloat(0, prec, ToNearestEven)
+ z1 := NewFloat(0, prec, ToNearestEven)
+ z2 := NewFloat(0, prec, ToNearestEven)
+
+ // z1 = 2 + 1.0/3*-6
+ p.Quo(one, three)
+ p.Mul(p, msix)
+ z1.Add(two, p)
+
+ // z2 = 2 - 1.0/3*+6
+ p.Quo(one, three)
+ p.Mul(p, psix)
+ z2.Sub(two, p)
+
+ if z1.Cmp(z2) != 0 {
+ t.Fatalf("prec %d: got z1 = %s != z2 = %s; want z1 == z2\n", prec, z1, z2)
+ }
+ if z1.Sign() != 0 {
+ t.Errorf("prec %d: got z1 = %s; want 0", prec, z1)
+ }
+ if z2.Sign() != 0 {
+ t.Errorf("prec %d: got z2 = %s; want 0", prec, z2)
+ }
+ }
+}
+
+func TestFloatQuo(t *testing.T) {
+ // TODO(gri) make the test vary these precisions
+ preci := 200 // precision of integer part
+ precf := 20 // precision of fractional part
+
+ for i := 0; i < 8; i++ {
+ // compute accurate (not rounded) result z
+ bits := []int{preci - 1}
+ if i&3 != 0 {
+ bits = append(bits, 0)
+ }
+ if i&2 != 0 {
+ bits = append(bits, -1)
+ }
+ if i&1 != 0 {
+ bits = append(bits, -precf)
+ }
+ z := fromBits(bits...)
+
+ // compute accurate x as z*y
+ y := new(Float).SetFloat64(3.14159265358979323e123)
+
+ x := NewFloat(0, z.Precision()+y.Precision(), ToZero)
+ x.Mul(z, y)
+
+ // leave for debugging
+ // fmt.Printf("x = %s\ny = %s\nz = %s\n", x, y, z)
+
+ if got := x.Accuracy(); got != Exact {
+ t.Errorf("got acc = %s; want exact", got)
+ }
+
+ // round accurate z for a variety of precisions and
+ // modes and compare against result of x / y.
+ for _, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
+ for d := -5; d < 5; d++ {
+ prec := uint(preci + d)
+ got := NewFloat(0, prec, mode).Quo(x, y)
+ want := roundBits(bits, prec, mode)
+ if got.Cmp(want) != 0 {
+ t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t/ %s\n\t= %s\n\twant %s",
+ i, prec, mode, x, y, got, want)
+ }
+ }
+ }
+ }
+}
+
+// normBits returns the normalized bits for x: It
+// removes multiple equal entries by treating them
+// as an addition (e.g., []int{5, 5} => []int{6}),
+// and it sorts the result list for reproducible
+// results.
+func normBits(x []int) []int {
+ m := make(map[int]bool)
+ for _, b := range x {
+ for m[b] {
+ m[b] = false
+ b++
+ }
+ m[b] = true
+ }
+ var z []int
+ for b, set := range m {
+ if set {
+ z = append(z, b)
+ }
+ }
+ sort.Ints(z)
+ return z
+}
+
+func TestNormBits(t *testing.T) {
+ var tests = []struct {
+ x, want []int
+ }{
+ {nil, nil},
+ {[]int{}, []int{}},
+ {[]int{0}, []int{0}},
+ {[]int{0, 0}, []int{1}},
+ {[]int{3, 1, 1}, []int{2, 3}},
+ {[]int{10, 9, 8, 7, 6, 6}, []int{11}},
+ }
+
+ for _, test := range tests {
+ got := fmt.Sprintf("%v", normBits(test.x))
+ want := fmt.Sprintf("%v", test.want)
+ if got != want {
+ t.Errorf("normBits(%v) = %s; want %s", test.x, got, want)
+ }
+
+ }
+}
+
+// roundBits returns the Float value rounded to prec bits
+// according to mode from the bit set x.
+func roundBits(x []int, prec uint, mode RoundingMode) *Float {
+ x = normBits(x)
+
+ // determine range
+ var min, max int
+ for i, b := range x {
+ if i == 0 || b < min {
+ min = b
+ }
+ if i == 0 || b > max {
+ max = b
+ }
+ }
+ prec0 := uint(max + 1 - min)
+ if prec >= prec0 {
+ return fromBits(x...)
+ }
+ // prec < prec0
+
+ // determine bit 0, rounding, and sticky bit, and result bits z
+ var bit0, rbit, sbit uint
+ var z []int
+ r := max - int(prec)
+ for _, b := range x {
+ switch {
+ case b == r:
+ rbit = 1
+ case b < r:
+ sbit = 1
+ default:
+ // b > r
+ if b == r+1 {
+ bit0 = 1
+ }
+ z = append(z, b)
+ }
+ }
+
+ // round
+ f := fromBits(z...) // rounded to zero
+ if mode == ToNearestAway {
+ panic("not yet implemented")
+ }
+ if mode == ToNearestEven && rbit == 1 && (sbit == 1 || sbit == 0 && bit0 != 0) || mode == AwayFromZero {
+ // round away from zero
+ f.Round(f, prec, ToZero) // extend precision // TODO(gri) better approach?
+ f.Add(f, fromBits(int(r)+1))
+ }
+ return f
+}
+
+// fromBits returns the *Float z of the smallest possible precision
+// such that z = sum(2**bits[i]), with i = range bits.
+// If multiple bits[i] are equal, they are added: fromBits(0, 1, 0)
+// == 2**1 + 2**0 + 2**0 = 4.
+func fromBits(bits ...int) *Float {
+ // handle 0
+ if len(bits) == 0 {
+ return new(Float)
+ // z.prec = ?
+ }
+ // len(bits) > 0
+
+ // determine lsb exponent
+ var min int
+ for i, b := range bits {
+ if i == 0 || b < min {
+ min = b
+ }
+ }
+
+ // create bit pattern
+ x := NewInt(0)
+ for _, b := range bits {
+ badj := b - min
+ // propagate carry if necessary
+ for x.Bit(badj) != 0 {
+ x.SetBit(x, badj, 0)
+ badj++
+ }
+ x.SetBit(x, badj, 1)
+ }
+
+ // create corresponding float
+ z := new(Float).SetInt(x) // normalized
+ z.setExp(int64(z.exp) + int64(min))
+ return z
+}
+
+func TestFromBits(t *testing.T) {
+ var tests = []struct {
+ bits []int
+ want string
+ }{
+ // all different bit numbers
+ {nil, "0.0p0"},
+ {[]int{0}, "0.8000000000000000p1"},
+ {[]int{1}, "0.8000000000000000p2"},
+ {[]int{-1}, "0.8000000000000000p0"},
+ {[]int{63}, "0.8000000000000000p64"},
+ {[]int{33, -30}, "0.8000000000000001p34"},
+ {[]int{255, 0}, "0.8000000000000000000000000000000000000000000000000000000000000001p256"},
+
+ // multiple equal bit numbers
+ {[]int{0, 0}, "0.8000000000000000p2"},
+ {[]int{0, 0, 0, 0}, "0.8000000000000000p3"},
+ {[]int{0, 1, 0}, "0.8000000000000000p3"},
+ {append([]int{2, 1, 0} /* 7 */, []int{3, 1} /* 10 */ ...), "0.8800000000000000p5" /* 17 */},
+ }
+
+ for _, test := range tests {
+ f := fromBits(test.bits...)
+ if got := f.PString(); got != test.want {
+ t.Errorf("setBits(%v) = %s; want %s", test.bits, got, test.want)
+ }
+ }
+}
+
+var floatSetFloat64StringTests = []struct {
+ s string
+ x float64
+}{
+ {"0", 0},
+ {"-0", -0},
+ {"+0", 0},
+ {"1", 1},
+ {"-1", -1},
+ {"+1", 1},
+ {"1.234", 1.234},
+ {"-1.234", -1.234},
+ {"+1.234", 1.234},
+ {".1", 0.1},
+ {"1.", 1},
+ {"+1.", 1},
+
+ {"0e100", 0},
+ {"-0e+100", 0},
+ {"+0e-100", 0},
+ {"0E100", 0},
+ {"-0E+100", 0},
+ {"+0E-100", 0},
+ {"0p100", 0},
+ {"-0p+100", 0},
+ {"+0p-100", 0},
+
+ {"1.e10", 1e10},
+ {"1e+10", 1e10},
+ {"+1e-10", 1e-10},
+ {"1E10", 1e10},
+ {"1.E+10", 1e10},
+ {"+1E-10", 1e-10},
+ {"1p10", 1 << 10},
+ {"1p+10", 1 << 10},
+ {"+1.p-10", 1.0 / (1 << 10)},
+
+ {"-687436.79457e-245", -687436.79457e-245},
+ {"-687436.79457E245", -687436.79457e245},
+ {"1024.p-12", 0.25},
+ {"-1.p10", -1024},
+ {"0.25p2", 1},
+
+ {".0000000000000000000000000000000000000001", 1e-40},
+ {"+10000000000000000000000000000000000000000e-0", 1e40},
+}
+
+func TestFloatSetFloat64String(t *testing.T) {
+ for _, test := range floatSetFloat64StringTests {
+ var x Float
+ x.prec = 53 // TODO(gri) find better solution
+ _, ok := x.SetString(test.s)
+ if !ok {
+ t.Errorf("%s: parse error", test.s)
+ continue
+ }
+ f, _ := x.Float64()
+ want := new(Float).SetFloat64(test.x)
+ if x.Cmp(want) != 0 {
+ t.Errorf("%s: got %s (%v); want %v", test.s, &x, f, test.x)
+ }
+ }
+}
+
+func TestFloatPString(t *testing.T) {
+ var tests = []struct {
+ x Float
+ want string
+ }{
+ {Float{}, "0.0p0"},
+ {Float{neg: true}, "-0.0p0"},
+ {Float{mant: nat{0x87654321}}, "0.87654321p0"},
+ {Float{mant: nat{0x87654321}, exp: -10}, "0.87654321p-10"},
+ }
+ for _, test := range tests {
+ if got := test.x.PString(); got != test.want {
+ t.Errorf("%v: got %s; want %s", test.x, got, test.want)
+ }
+ }
+}