//
// Each Float value also has a precision, rounding mode, and accuracy.
//
-// The precision is the (maximum) number of mantissa bits available to
+// The precision is the maximum number of mantissa bits available to
// represent the value. The rounding mode specifies how a result should
// be rounded to fit into the mantissa bits, and accuracy describes the
// rounding error with respect to the exact result.
// and according to its rounding mode, unless specified otherwise. If the
// result precision is 0 (see below), it is set to the precision of the
// argument with the largest precision value before any rounding takes
-// place.
-// TODO(gri) should the rounding mode also be copied in this case?
+// place. The rounding mode remains unchanged, thus uninitialized Floats
+// provided as result arguments will "inherit" a reasonble precision from
+// the incoming arguments and their mode is the zero value for RoundingMode
+// (ToNearestEven).
//
// By setting the desired precision to 24 or 53 and using ToNearestEven
// rounding, Float operations produce the same results as the corresponding
// of the the Word size _W, x.mant[0] has trailing zero bits. Zero and Inf
// values have an empty mantissa and a 0 or infExp exponent, respectively.
-// NewFloat returns a new Float with value x rounded
-// to prec bits according to the given rounding mode.
-// If prec == 0, the result has value 0.0 independent
-// of the value of x.
-// BUG(gri) For prec == 0 and x == Inf, the result
-// should be Inf as well.
-// TODO(gri) rethink this signature.
-func NewFloat(x float64, prec uint, mode RoundingMode) *Float {
- var z Float
- if prec > 0 {
- // TODO(gri) should make this more efficient
- z.SetFloat64(x)
- return z.Round(&z, prec, mode)
- }
- z.mode = mode // TODO(gri) don't do this twice for prec > 0
- return &z
-}
-
const (
MaxExp = math.MaxInt32 // largest supported exponent magnitude
infExp = -MaxExp - 1 // exponent for Inf values
panic("unreachable")
}
-// Precision returns the mantissa precision of x in bits.
-// The precision may be 0 for |x| == 0 or |x| == Inf.
-func (x *Float) Precision() uint {
+// SetPrec sets z's precision to prec and returns the (possibly) rounded
+// value of z. Rounding occurs according to z's rounding mode if the mantissa
+// cannot be represented in prec bits without loss of precision.
+func (z *Float) SetPrec(prec uint) *Float {
+ old := z.prec
+ z.prec = prec
+ if prec < old {
+ z.round(0)
+ }
+ return z
+}
+
+// SetMode sets z's rounding mode to mode and returns z.
+// z remains unchanged otherwise.
+func (z *Float) SetMode(mode RoundingMode) *Float {
+ z.mode = mode
+ return z
+}
+
+// Prec returns the mantissa precision of x in bits.
+// The result may be 0 for |x| == 0 or |x| == Inf.
+func (x *Float) Prec() uint {
return uint(x.prec)
}
-// Accuracy returns the accuracy of x produced by the most recent operation.
-func (x *Float) Accuracy() Accuracy {
+// Acc returns the accuracy of x produced by the most recent operation.
+func (x *Float) Acc() Accuracy {
return x.acc
}
}
// zero value has precision 0
- if prec := x.Precision(); prec != 0 {
+ if prec := x.Prec(); prec != 0 {
t.Errorf("prec = %d; want 0", prec)
}
// zero value can be used in any and all positions of binary operations
make := func(x int) *Float {
- if x == 0 {
- return new(Float) // 0 translates into the zero value
+ var f Float
+ if x != 0 {
+ f.SetInt64(int64(x))
}
- return NewFloat(float64(x), 10, 0)
+ // x == 0 translates into the zero value
+ return &f
}
for _, test := range []struct {
z, x, y, want int
return NewInf(-1)
}
var x Float
- x.prec = 1000 // TODO(gri) find a better way to do this
+ x.SetPrec(1000)
if _, ok := x.SetString(s); !ok {
panic(fmt.Sprintf("%q is not a valid float", s))
}
// check result
r1 := f.int64()
- p1 := f.Precision()
- a1 := f.Accuracy()
+ p1 := f.Prec()
+ a1 := f.Acc()
if r1 != r || p1 != prec || a1 != a {
t.Errorf("Round(%s, %d, %s): got %s (%d bits, %s); want %s (%d bits, %s)",
toBinary(x), prec, mode,
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x uint64 = 0x8765432187654321 // 64 bits needed
for prec := uint(1); prec <= 64; prec++ {
- f := NewFloat(0, prec, ToZero).SetUint64(x)
+ f := new(Float).SetPrec(prec).SetMode(ToZero).SetUint64(x)
got := f.uint64()
want := x &^ (1<<(64-prec) - 1) // cut off (round to zero) low 64-prec bits
if got != want {
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x int64 = 0x7654321076543210 // 63 bits needed
for prec := uint(1); prec <= 63; prec++ {
- f := NewFloat(0, prec, ToZero).SetInt64(x)
+ f := new(Float).SetPrec(prec).SetMode(ToZero).SetInt64(x)
got := f.int64()
want := x &^ (1<<(63-prec) - 1) // cut off (round to zero) low 63-prec bits
if got != want {
// test basic rounding behavior (exhaustive rounding testing is done elsewhere)
const x uint64 = 0x8765432143218 // 53 bits needed
for prec := uint(1); prec <= 52; prec++ {
- f := NewFloat(0, prec, ToZero).SetFloat64(float64(x))
+ f := new(Float).SetPrec(prec).SetMode(ToZero).SetFloat64(float64(x))
got, _ := f.Float64()
want := float64(x &^ (1<<(52-prec) - 1)) // cut off (round to zero) low 53-prec bits
if got != want {
if n < 64 {
n = 64
}
- if prec := f.Precision(); prec != uint(n) {
+ if prec := f.Prec(); prec != uint(n) {
t.Errorf("got prec = %d; want %d", prec, n)
}
}
n := max(x.Num().BitLen(), x.Denom().BitLen())
- var f1 Float
- var f2 = NewFloat(0, 1000, 0) // set a high precision - TODO(gri) find a cleaner way
+ var f1, f2 Float
+ f2.SetPrec(1000)
f1.SetRat(&x)
f2.SetRat(&x)
if n < 64 {
n = 64
}
- if prec := f1.Precision(); prec != uint(n) {
+ if prec := f1.Prec(); prec != uint(n) {
t.Errorf("got prec = %d; want %d", prec, n)
}
continue // prec must be large enough to hold all numbers from 0 to n
}
var x, one Float
- x.prec = prec
+ x.SetPrec(prec)
one.SetInt64(1)
for i := 0; i < n; i++ {
x.Add(&x, &one)
for i, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
for _, prec := range precList {
- got := NewFloat(0, prec, mode)
+ got := new(Float).SetPrec(prec).SetMode(mode)
got.Add(x, y)
want := roundBits(zbits, prec, mode)
if got.Cmp(want) != 0 {
x := new(Float).SetFloat64(x0)
y := new(Float).SetFloat64(y0)
- z := NewFloat(0, 24, ToNearestEven)
+ z := new(Float).SetPrec(24)
z.Add(x, y)
got, acc := z.Float64()
x := new(Float).SetFloat64(x0)
y := new(Float).SetFloat64(y0)
- z := NewFloat(0, 53, ToNearestEven)
+ z := new(Float).SetPrec(53)
z.Add(x, y)
got, acc := z.Float64()
x := new(Float).SetFloat64(x0)
y := new(Float).SetFloat64(y0)
- z := NewFloat(0, 53, ToNearestEven)
+ z := new(Float).SetPrec(53)
z.Mul(x, y)
got, _ := z.Float64()
func TestIssue6866(t *testing.T) {
for _, prec := range precList {
- two := NewFloat(2, prec, ToNearestEven)
- one := NewFloat(1, prec, ToNearestEven)
- three := NewFloat(3, prec, ToNearestEven)
- msix := NewFloat(-6, prec, ToNearestEven)
- psix := NewFloat(+6, prec, ToNearestEven)
+ two := new(Float).SetPrec(prec).SetInt64(2)
+ one := new(Float).SetPrec(prec).SetInt64(1)
+ three := new(Float).SetPrec(prec).SetInt64(3)
+ msix := new(Float).SetPrec(prec).SetInt64(-6)
+ psix := new(Float).SetPrec(prec).SetInt64(+6)
- p := NewFloat(0, prec, ToNearestEven)
- z1 := NewFloat(0, prec, ToNearestEven)
- z2 := NewFloat(0, prec, ToNearestEven)
+ p := new(Float).SetPrec(prec)
+ z1 := new(Float).SetPrec(prec)
+ z2 := new(Float).SetPrec(prec)
// z1 = 2 + 1.0/3*-6
p.Quo(one, three)
// compute accurate x as z*y
y := new(Float).SetFloat64(3.14159265358979323e123)
- x := NewFloat(0, z.Precision()+y.Precision(), ToZero)
+ x := new(Float).SetPrec(z.Prec() + y.Prec()).SetMode(ToZero)
x.Mul(z, y)
// leave for debugging
// fmt.Printf("x = %s\ny = %s\nz = %s\n", x, y, z)
- if got := x.Accuracy(); got != Exact {
+ if got := x.Acc(); got != Exact {
t.Errorf("got acc = %s; want exact", got)
}
for _, mode := range [...]RoundingMode{ToZero, ToNearestEven, AwayFromZero} {
for d := -5; d < 5; d++ {
prec := uint(preci + d)
- got := NewFloat(0, prec, mode).Quo(x, y)
+ got := new(Float).SetPrec(prec).SetMode(mode).Quo(x, y)
want := roundBits(bits, prec, mode)
if got.Cmp(want) != 0 {
t.Errorf("i = %d, prec = %d, %s:\n\t %s\n\t/ %s\n\t= %s\n\twant %s",
// vary operand precision (only ok as long as a, b can be represented correctly)
for ad := -dprec; ad <= dprec; ad++ {
for bd := -dprec; bd <= dprec; bd++ {
- A := NewFloat(a, uint(prec+ad), 0)
- B := NewFloat(b, uint(prec+bd), 0)
- C := NewFloat(0, 53, 0).Quo(A, B) // C has float64 mantissa width
+ A := new(Float).SetPrec(uint(prec + ad)).SetFloat64(a)
+ B := new(Float).SetPrec(uint(prec + bd)).SetFloat64(b)
+ C := new(Float).SetPrec(53).Quo(A, B) // C has float64 mantissa width
cc, acc := C.Float64()
if cc != c {
}
if mode == ToNearestEven && rbit == 1 && (sbit == 1 || sbit == 0 && bit0 != 0) || mode == AwayFromZero {
// round away from zero
- f.Round(f, prec, ToZero) // extend precision // TODO(gri) better approach?
+ f.SetMode(ToZero).SetPrec(prec)
f.Add(f, fromBits(int(r)+1))
}
return f
// handle 0
if len(bits) == 0 {
return new(Float)
- // z.prec = ?
}
// len(bits) > 0