runtime: adjust softfloat corner cases to match amd64/arm64

author David Chase <drchase@google.com>

Mon, 6 Oct 2025 19:01:03 +0000 (15:01 -0400)

committer David Chase <drchase@google.com>

Thu, 9 Oct 2025 15:23:39 +0000 (08:23 -0700)
author David Chase <drchase@google.com>
Mon, 6 Oct 2025 19:01:03 +0000 (15:01 -0400)
committer David Chase <drchase@google.com>
Thu, 9 Oct 2025 15:23:39 +0000 (08:23 -0700)
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go

index 9f2fcacc30ee5cdbdf9f186205e9d28252968097..f61cac763cef3d44d96ef798184c9004a2a6f5bd 100644 (file)
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -25,6 +25,7 @@ var F32to64 = f32to64
  var Fcmp64 = fcmp64
  var Fintto64 = fintto64
  var F64toint = f64toint
+var F64touint = f64touint64
  
  var Entersyscall = entersyscall
  var Exitsyscall = exitsyscall
diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go

index 42ef0092970b3e2525b65fa1c4cf5559d51b44d7..7b9409f75be380945f8b19466a7580dfab46189d 100644 (file)
--- a/src/runtime/softfloat64.go
+++ b/src/runtime/softfloat64.go
@@ -26,6 +26,11 @@ const (
         neg32 uint32 = 1 << (expbits32 + mantbits32)
  )
  
+// If F is not NaN and not Inf, then f == (-1)**sign * mantissa * 2**(exp-52)
+// The mantissa and exp are adjusted from their stored representation  so
+// that the mantissa includes the formerly implicit 1, the exponent bias
+// is removed, and denormalized floats to put a 1 in the expected
+// (1<<mantbits64) position.
  func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
         sign = f & (1 << (mantbits64 + expbits64))
         mant = f & (1<<mantbits64 - 1)
@@ -371,24 +376,25 @@ func fcmp64(f, g uint64) (cmp int32, isnan bool) {
         return 0, false
  }
  
-func f64toint(f uint64) (val int64, ok bool) {
+// returns saturated-conversion int64 value of f
+// and whether the input was NaN (in which case it
+// may not match the "hardware" conversion).
+func f64toint(f uint64) (val int64, isNan bool) {
         fs, fm, fe, fi, fn := funpack64(f)
  
         switch {
-       case fi, fn: // NaN
-               return 0, false
+
+       case fn: // NaN
+               return -0x8000_0000_0000_0000, false
  
         case fe < -1: // f < 0.5
                 return 0, false
  
-       case fe > 63: // f >= 2^63
-               if fs != 0 && fm == 0 { // f == -2^63
-                       return -1 << 63, true
-               }
+       case fi || fe >= 63: // |f| >= 2^63, including infinity
                 if fs != 0 {
-                       return 0, false
+                       return -0x8000_0000_0000_0000, true
                 }
-               return 0, false
+               return 0x7fff_ffff_ffff_ffff, true
         }
  
         for fe > int(mantbits64) {
@@ -400,12 +406,51 @@ func f64toint(f uint64) (val int64, ok bool) {
                 fm >>= 1
         }
         val = int64(fm)
+       if val < 0 {
+               if fs != 0 {
+                       return -0x8000_0000_0000_0000, true
+               }
+               return 0x7fff_ffff_ffff_ffff, true
+       }
         if fs != 0 {
                 val = -val
         }
         return val, true
  }
  
+// returns saturated-conversion uint64 value of f
+// and whether the input was NaN (in which case it
+// may not match the "hardware" conversion).
+func f64touint(f uint64) (val uint64, isNan bool) {
+       fs, fm, fe, fi, fn := funpack64(f)
+
+       switch {
+
+       case fn: // NaN
+               return 0xffff_ffff_ffff_ffff, false
+
+       case fs != 0: // all negative, including -Inf, are zero
+               return 0, true
+
+       case fi || fe >= 64: // positive infinity or f >= 2^64
+               return 0xffff_ffff_ffff_ffff, true
+
+       case fe < -1: // f < 0.5
+               return 0, true
+       }
+
+       for fe > int(mantbits64) {
+               fe--
+               fm <<= 1
+       }
+       for fe < int(mantbits64) {
+               fe++
+               fm >>= 1
+       }
+       val = fm
+       return val, true
+}
+
  func fintto64(val int64) (f uint64) {
         fs := uint64(val) & (1 << 63)
         mant := uint64(val)
@@ -564,6 +609,12 @@ func fint64to64(x int64) uint64 {
  
  func f32toint32(x uint32) int32 {
         val, _ := f64toint(f32to64(x))
+       if val >= 0x7fffffff {
+               return 0x7fffffff
+       }
+       if val < -0x80000000 {
+               return -0x80000000
+       }
         return int32(val)
  }
  
@@ -574,6 +625,12 @@ func f32toint64(x uint32) int64 {
  
  func f64toint32(x uint64) int32 {
         val, _ := f64toint(x)
+       if val >= 0x7fffffff {
+               return 0x7fffffff
+       }
+       if val < -0x80000000 {
+               return -0x80000000
+       }
         return int32(val)
  }
  
@@ -583,23 +640,13 @@ func f64toint64(x uint64) int64 {
  }
  
  func f64touint64(x uint64) uint64 {
-       var m uint64 = 0x43e0000000000000 // float64 1<<63
-       if fgt64(m, x) {
-               return uint64(f64toint64(x))
-       }
-       y := fadd64(x, -m)
-       z := uint64(f64toint64(y))
-       return z | (1 << 63)
+       val, _ := f64touint(x)
+       return val
  }
  
  func f32touint64(x uint32) uint64 {
-       var m uint32 = 0x5f000000 // float32 1<<63
-       if fgt32(m, x) {
-               return uint64(f32toint64(x))
-       }
-       y := fadd32(x, -m)
-       z := uint64(f32toint64(y))
-       return z | (1 << 63)
+       val, _ := f64touint(f32to64(x))
+       return val
  }
  
  func fuint64to64(x uint64) uint64 {
diff --git a/src/runtime/softfloat64_test.go b/src/runtime/softfloat64_test.go

index 3f53e8bc55810cfb9cbd67961cbd958d9a7b2067..233d5e01c0ea60b2292c888baa7614342c588917 100644 (file)
--- a/src/runtime/softfloat64_test.go
+++ b/src/runtime/softfloat64_test.go
@@ -28,6 +28,15 @@ func div(x, y float64) float64 { return x / y }
  func TestFloat64(t *testing.T) {
         base := []float64{
                 0,
+               1,
+               -9223372036854775808,
+               -9223372036854775808 + 4096,
+               18446744073709551615,
+               18446744073709551615 + 1,
+               18446744073709551615 - 1,
+               9223372036854775808 + 4096,
+               0.5,
+               0.75,
                 math.Copysign(0, -1),
                 -1,
                 1,
@@ -35,6 +44,8 @@ func TestFloat64(t *testing.T) {
                 math.Inf(+1),
                 math.Inf(-1),
                 0.1,
+               0.5,
+               0.75,
                 1.5,
                 1.9999999999999998,     // all 1s mantissa
                 1.3333333333333333,     // 1.010101010101...
@@ -70,7 +81,7 @@ func TestFloat64(t *testing.T) {
                 1e+307,
                 1e+308,
         }
-       all := make([]float64, 200)
+       all := make([]float64, 250)
         copy(all, base)
         for i := len(base); i < len(all); i++ {
                 all[i] = rand.NormFloat64()
@@ -82,6 +93,7 @@ func TestFloat64(t *testing.T) {
                 test(t, "*", mul, fop(Fmul64), all)
                 test(t, "/", div, fop(Fdiv64), all)
         }
+
  }
  
  // 64 -hw-> 32 -hw-> 64
@@ -104,6 +116,11 @@ func hwint64(f float64) float64 {
         return float64(int64(f))
  }
  
+// float64 -hw-> uint64 -hw-> float64
+func hwuint64(f float64) float64 {
+       return float64(uint64(f))
+}
+
  // float64 -hw-> int32 -hw-> float64
  func hwint32(f float64) float64 {
         return float64(int32(f))
@@ -113,13 +130,23 @@ func hwint32(f float64) float64 {
  func toint64sw(f float64) float64 {
         i, ok := F64toint(math.Float64bits(f))
         if !ok {
-               // There's no right answer for out of range.
+               // There's no right answer for NaN.
                 // Match the hardware to pass the test.
                 i = int64(f)
         }
         return float64(i)
  }
  
+func touint64sw(f float64) float64 {
+       i := F64touint(math.Float64bits(f))
+       if f != f {
+               // There's no right answer for NaN.
+               // Match the hardware to pass the test.
+               i = uint64(f)
+       }
+       return float64(i)
+}
+
  // float64 -hw-> int64 -sw-> float64
  func fromint64sw(f float64) float64 {
         return math.Float64frombits(Fintto64(int64(f)))
@@ -150,6 +177,7 @@ func test(t *testing.T, op string, hw, sw func(float64, float64) float64, all []
                         testu(t, "to32", trunc32, to32sw, h)
                         testu(t, "to64", trunc32, to64sw, h)
                         testu(t, "toint64", hwint64, toint64sw, h)
+                       testu(t, "touint64", hwuint64, touint64sw, h)
                         testu(t, "fromint64", hwint64, fromint64sw, h)
                         testcmp(t, f, h)
                         testcmp(t, h, f)
@@ -163,6 +191,7 @@ func testu(t *testing.T, op string, hw, sw func(float64) float64, v float64) {
         h := hw(v)
         s := sw(v)
         if !same(h, s) {
+               s = sw(v) // debug me
                 err(t, "%s %g = sw %g, hw %g\n", op, v, s, h)
         }
  }
diff --git a/test/convert5.go b/test/convert5.go

index 57585ef76e16731cfda60723a42eea016a29832a..27aa7867f4282456899b5a8ae6048ad4dfb53240 100644 (file)
--- a/test/convert5.go
+++ b/test/convert5.go
@@ -62,6 +62,8 @@ func main() {
         p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
         n32_minus4k := id(float32(n32 - 4096))
         n64_minus4k := id(float64(n64 - 4096))
+       n32_plus4k := id(float32(n32 + 4096))
+       n64_plus4k := id(float64(n64 + 4096))
         inf_32 := id(float32(one / 0))
         inf_64 := id(float64(one / 0))
         ninf_32 := id(float32(-one / 0))
@@ -79,6 +81,7 @@ func main() {
                 {"p64_plus4k_plus1", p64_plus4k_plus1, p32},
                 {"n32_minus4k", n32_minus4k, n32},
                 {"n64_minus4k", n64_minus4k, n32},
+               {"n32_plus4k", n32_plus4k, n32 + 4096},
                 {"inf_32", inf_32, p32},
                 {"inf_64", inf_64, p32},
                 {"ninf_32", ninf_32, n32},
@@ -108,6 +111,8 @@ func main() {
                 {"p64_plus4k_plus1", p64_plus4k_plus1, p64},
                 {"n32_minus4k", n32_minus4k, n32 - 4096},
                 {"n64_minus4k", n64_minus4k, n64},
+               {"n32_plus4k", n32_plus4k, n32 + 4096},
+               {"n64_plus4k", n64_plus4k, n64 + 4096},
                 {"inf_32", inf_32, p64},
                 {"inf_64", inf_64, p64},
                 {"ninf_32", ninf_32, n64},
author	David Chase <drchase@google.com>
	Mon, 6 Oct 2025 19:01:03 +0000 (15:01 -0400)
committer	David Chase <drchase@google.com>
	Thu, 9 Oct 2025 15:23:39 +0000 (08:23 -0700)
src/runtime/export_test.go		patch \| blob \| history
src/runtime/softfloat64.go		patch \| blob \| history
src/runtime/softfloat64_test.go		patch \| blob \| history
test/convert5.go		patch \| blob \| history