This chooses saturating behavior for over/underflow.
Change-Id: I96a33ef73feacdafe8310f893de445060bc1a536
Reviewed-on: https://go-review.googlesource.com/c/go/+/709595
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
var Fcmp64 = fcmp64
var Fintto64 = fintto64
var F64toint = f64toint
+var F64touint = f64touint64
var Entersyscall = entersyscall
var Exitsyscall = exitsyscall
neg32 uint32 = 1 << (expbits32 + mantbits32)
)
+// If F is not NaN and not Inf, then f == (-1)**sign * mantissa * 2**(exp-52)
+// The mantissa and exp are adjusted from their stored representation so
+// that the mantissa includes the formerly implicit 1, the exponent bias
+// is removed, and denormalized floats to put a 1 in the expected
+// (1<<mantbits64) position.
func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
sign = f & (1 << (mantbits64 + expbits64))
mant = f & (1<<mantbits64 - 1)
return 0, false
}
-func f64toint(f uint64) (val int64, ok bool) {
+// returns saturated-conversion int64 value of f
+// and whether the input was NaN (in which case it
+// may not match the "hardware" conversion).
+func f64toint(f uint64) (val int64, isNan bool) {
fs, fm, fe, fi, fn := funpack64(f)
switch {
- case fi, fn: // NaN
- return 0, false
+
+ case fn: // NaN
+ return -0x8000_0000_0000_0000, false
case fe < -1: // f < 0.5
return 0, false
- case fe > 63: // f >= 2^63
- if fs != 0 && fm == 0 { // f == -2^63
- return -1 << 63, true
- }
+ case fi || fe >= 63: // |f| >= 2^63, including infinity
if fs != 0 {
- return 0, false
+ return -0x8000_0000_0000_0000, true
}
- return 0, false
+ return 0x7fff_ffff_ffff_ffff, true
}
for fe > int(mantbits64) {
fm >>= 1
}
val = int64(fm)
+ if val < 0 {
+ if fs != 0 {
+ return -0x8000_0000_0000_0000, true
+ }
+ return 0x7fff_ffff_ffff_ffff, true
+ }
if fs != 0 {
val = -val
}
return val, true
}
+// returns saturated-conversion uint64 value of f
+// and whether the input was NaN (in which case it
+// may not match the "hardware" conversion).
+func f64touint(f uint64) (val uint64, isNan bool) {
+ fs, fm, fe, fi, fn := funpack64(f)
+
+ switch {
+
+ case fn: // NaN
+ return 0xffff_ffff_ffff_ffff, false
+
+ case fs != 0: // all negative, including -Inf, are zero
+ return 0, true
+
+ case fi || fe >= 64: // positive infinity or f >= 2^64
+ return 0xffff_ffff_ffff_ffff, true
+
+ case fe < -1: // f < 0.5
+ return 0, true
+ }
+
+ for fe > int(mantbits64) {
+ fe--
+ fm <<= 1
+ }
+ for fe < int(mantbits64) {
+ fe++
+ fm >>= 1
+ }
+ val = fm
+ return val, true
+}
+
func fintto64(val int64) (f uint64) {
fs := uint64(val) & (1 << 63)
mant := uint64(val)
func f32toint32(x uint32) int32 {
val, _ := f64toint(f32to64(x))
+ if val >= 0x7fffffff {
+ return 0x7fffffff
+ }
+ if val < -0x80000000 {
+ return -0x80000000
+ }
return int32(val)
}
func f64toint32(x uint64) int32 {
val, _ := f64toint(x)
+ if val >= 0x7fffffff {
+ return 0x7fffffff
+ }
+ if val < -0x80000000 {
+ return -0x80000000
+ }
return int32(val)
}
}
func f64touint64(x uint64) uint64 {
- var m uint64 = 0x43e0000000000000 // float64 1<<63
- if fgt64(m, x) {
- return uint64(f64toint64(x))
- }
- y := fadd64(x, -m)
- z := uint64(f64toint64(y))
- return z | (1 << 63)
+ val, _ := f64touint(x)
+ return val
}
func f32touint64(x uint32) uint64 {
- var m uint32 = 0x5f000000 // float32 1<<63
- if fgt32(m, x) {
- return uint64(f32toint64(x))
- }
- y := fadd32(x, -m)
- z := uint64(f32toint64(y))
- return z | (1 << 63)
+ val, _ := f64touint(f32to64(x))
+ return val
}
func fuint64to64(x uint64) uint64 {
func TestFloat64(t *testing.T) {
base := []float64{
0,
+ 1,
+ -9223372036854775808,
+ -9223372036854775808 + 4096,
+ 18446744073709551615,
+ 18446744073709551615 + 1,
+ 18446744073709551615 - 1,
+ 9223372036854775808 + 4096,
+ 0.5,
+ 0.75,
math.Copysign(0, -1),
-1,
1,
math.Inf(+1),
math.Inf(-1),
0.1,
+ 0.5,
+ 0.75,
1.5,
1.9999999999999998, // all 1s mantissa
1.3333333333333333, // 1.010101010101...
1e+307,
1e+308,
}
- all := make([]float64, 200)
+ all := make([]float64, 250)
copy(all, base)
for i := len(base); i < len(all); i++ {
all[i] = rand.NormFloat64()
test(t, "*", mul, fop(Fmul64), all)
test(t, "/", div, fop(Fdiv64), all)
}
+
}
// 64 -hw-> 32 -hw-> 64
return float64(int64(f))
}
+// float64 -hw-> uint64 -hw-> float64
+func hwuint64(f float64) float64 {
+ return float64(uint64(f))
+}
+
// float64 -hw-> int32 -hw-> float64
func hwint32(f float64) float64 {
return float64(int32(f))
func toint64sw(f float64) float64 {
i, ok := F64toint(math.Float64bits(f))
if !ok {
- // There's no right answer for out of range.
+ // There's no right answer for NaN.
// Match the hardware to pass the test.
i = int64(f)
}
return float64(i)
}
+func touint64sw(f float64) float64 {
+ i := F64touint(math.Float64bits(f))
+ if f != f {
+ // There's no right answer for NaN.
+ // Match the hardware to pass the test.
+ i = uint64(f)
+ }
+ return float64(i)
+}
+
// float64 -hw-> int64 -sw-> float64
func fromint64sw(f float64) float64 {
return math.Float64frombits(Fintto64(int64(f)))
testu(t, "to32", trunc32, to32sw, h)
testu(t, "to64", trunc32, to64sw, h)
testu(t, "toint64", hwint64, toint64sw, h)
+ testu(t, "touint64", hwuint64, touint64sw, h)
testu(t, "fromint64", hwint64, fromint64sw, h)
testcmp(t, f, h)
testcmp(t, h, f)
h := hw(v)
s := sw(v)
if !same(h, s) {
+ s = sw(v) // debug me
err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
}
}
p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
n32_minus4k := id(float32(n32 - 4096))
n64_minus4k := id(float64(n64 - 4096))
+ n32_plus4k := id(float32(n32 + 4096))
+ n64_plus4k := id(float64(n64 + 4096))
inf_32 := id(float32(one / 0))
inf_64 := id(float64(one / 0))
ninf_32 := id(float32(-one / 0))
{"p64_plus4k_plus1", p64_plus4k_plus1, p32},
{"n32_minus4k", n32_minus4k, n32},
{"n64_minus4k", n64_minus4k, n32},
+ {"n32_plus4k", n32_plus4k, n32 + 4096},
{"inf_32", inf_32, p32},
{"inf_64", inf_64, p32},
{"ninf_32", ninf_32, n32},
{"p64_plus4k_plus1", p64_plus4k_plus1, p64},
{"n32_minus4k", n32_minus4k, n32 - 4096},
{"n64_minus4k", n64_minus4k, n64},
+ {"n32_plus4k", n32_plus4k, n32 + 4096},
+ {"n64_plus4k", n64_plus4k, n64 + 4096},
{"inf_32", inf_32, p64},
{"inf_64", inf_64, p64},
{"ninf_32", ninf_32, n64},