]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: modify float-to-[u]int so that amd64 and arm64 match
authorDavid Chase <drchase@google.com>
Mon, 21 Jul 2025 17:30:08 +0000 (13:30 -0400)
committerDavid Chase <drchase@google.com>
Thu, 9 Oct 2025 15:23:21 +0000 (08:23 -0700)
Eventual goal is that all the architectures agree, and are
sensible.  The test will be build-tagged to exclude
not-yet-handled platforms.

This change also bisects the conversion change in case of bugs.
(`bisect -compile=convert ...`)

Change-Id: I98528666b0a3fde17cbe8d69b612d01da18dce85
Reviewed-on: https://go-review.googlesource.com/c/go/+/691135
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
src/cmd/compile/internal/base/debug.go
src/cmd/compile/internal/base/flag.go
src/cmd/compile/internal/base/hashdebug.go
src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/ssa.go
test/convert5.go [new file with mode: 0644]
test/convert5.out [new file with mode: 0644]

index 85873dcc40e1b31279c9c8b20cb4df3efcb75130..9e8ab2f488bb4d13ea63067947bf00a6c1532fce 100644 (file)
@@ -20,6 +20,7 @@ type DebugFlags struct {
        Append                int    `help:"print information about append compilation"`
        Checkptr              int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
        Closure               int    `help:"print information about closure compilation"`
+       Converthash           string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"`
        Defer                 int    `help:"print information about defer compilation"`
        DisableNil            int    `help:"disable nil checks" concurrent:"ok"`
        DumpInlFuncProps      string `help:"dump function properties from inl heuristics to specified file"`
index a0ed876cfc8e0e1d1265d5659b12612ae440e1a3..1ac2cecc61ec7ec6b3b5e3bc9a7737d458e5ed16 100644 (file)
@@ -262,6 +262,9 @@ func ParseFlags() {
                Debug.LoopVar = 1
        }
 
+       if Debug.Converthash != "" {
+               ConvertHash = NewHashDebug("converthash", Debug.Converthash, nil)
+       }
        if Debug.Fmahash != "" {
                FmaHash = NewHashDebug("fmahash", Debug.Fmahash, nil)
        }
index fa63deb46a3c019bde47b683a4d7b6e3ef882db6..edf567457cb04bd39979288438770eae58a0ba75 100644 (file)
@@ -53,6 +53,7 @@ func (d *HashDebug) SetInlineSuffixOnly(b bool) *HashDebug {
 // The default compiler-debugging HashDebug, for "-d=gossahash=..."
 var hashDebug *HashDebug
 
+var ConvertHash *HashDebug      // for debugging float-to-[u]int conversion changes
 var FmaHash *HashDebug          // for debugging fused-multiply-add floating point changes
 var LoopVarHash *HashDebug      // for debugging shared/private loop variable changes
 var PGOHash *HashDebug          // for debugging PGO optimization decisions
index 7d3efef5cdc837981e6e1efa3003d8625571ad23..0bea99e38de1bcb4fcd47210b838db7880bed87e 100644 (file)
 (Cvt64to32F ...) => (CVTSQ2SS ...)
 (Cvt64to64F ...) => (CVTSQ2SD ...)
 
-(Cvt32Fto32 ...) => (CVTTSS2SL ...)
-(Cvt32Fto64 ...) => (CVTTSS2SQ ...)
-(Cvt64Fto32 ...) => (CVTTSD2SL ...)
-(Cvt64Fto64 ...) => (CVTTSD2SQ ...)
+// Float, to int.
+// To make AMD64 "overflow" return max positive instead of max negative, compute
+// y and not x, smear the sign bit, and xor.
+(Cvt32Fto32 <t> x) && base.ConvertHash.MatchPos(v.Pos, nil) => (XORL <t> y (SARLconst <t> [31] (ANDL <t> y:(CVTTSS2SL <t> x) (NOTL <typ.Int32> (MOVLf2i x)))))
+(Cvt64Fto32 <t> x) && base.ConvertHash.MatchPos(v.Pos, nil) => (XORL <t> y (SARLconst <t> [31] (ANDL <t> y:(CVTTSD2SL <t> x) (NOTL <typ.Int32> (MOVLf2i (CVTSD2SS <typ.Float32> x))))))
+
+(Cvt32Fto64 <t> x) && base.ConvertHash.MatchPos(v.Pos, nil) => (XORQ <t> y (SARQconst <t> [63] (ANDQ <t> y:(CVTTSS2SQ <t> x) (NOTQ <typ.Int64> (MOVQf2i (CVTSS2SD <typ.Float64> x))) )))
+(Cvt64Fto64 <t> x) && base.ConvertHash.MatchPos(v.Pos, nil) => (XORQ <t> y (SARQconst <t> [63] (ANDQ <t> y:(CVTTSD2SQ <t> x) (NOTQ <typ.Int64> (MOVQf2i x)))))
+
+(Cvt32Fto32 <t> x) && !base.ConvertHash.MatchPos(v.Pos, nil) => (CVTTSS2SL <t> x)
+(Cvt32Fto64 <t> x) && !base.ConvertHash.MatchPos(v.Pos, nil) => (CVTTSS2SQ <t> x)
+(Cvt64Fto32 <t> x) && !base.ConvertHash.MatchPos(v.Pos, nil) => (CVTTSD2SL <t> x)
+(Cvt64Fto64 <t> x) && !base.ConvertHash.MatchPos(v.Pos, nil) => (CVTTSD2SQ <t> x)
 
 (Cvt32Fto64F ...) => (CVTSS2SD ...)
 (Cvt64Fto32F ...) => (CVTSD2SS ...)
index a7ee632ae1af7241b48d24aeb5f6981f3304dc4b..e702925f5f3bac37ab8c3bc30e3854fa6e2c482e 100644 (file)
@@ -5,6 +5,7 @@ package ssa
 import "internal/buildcfg"
 import "math"
 import "cmd/internal/obj"
+import "cmd/compile/internal/base"
 import "cmd/compile/internal/types"
 
 func rewriteValueAMD64(v *Value) bool {
@@ -694,11 +695,9 @@ func rewriteValueAMD64(v *Value) bool {
        case OpCtz8NonZero:
                return rewriteValueAMD64_OpCtz8NonZero(v)
        case OpCvt32Fto32:
-               v.Op = OpAMD64CVTTSS2SL
-               return true
+               return rewriteValueAMD64_OpCvt32Fto32(v)
        case OpCvt32Fto64:
-               v.Op = OpAMD64CVTTSS2SQ
-               return true
+               return rewriteValueAMD64_OpCvt32Fto64(v)
        case OpCvt32Fto64F:
                v.Op = OpAMD64CVTSS2SD
                return true
@@ -709,14 +708,12 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64CVTSL2SD
                return true
        case OpCvt64Fto32:
-               v.Op = OpAMD64CVTTSD2SL
-               return true
+               return rewriteValueAMD64_OpCvt64Fto32(v)
        case OpCvt64Fto32F:
                v.Op = OpAMD64CVTSD2SS
                return true
        case OpCvt64Fto64:
-               v.Op = OpAMD64CVTTSD2SQ
-               return true
+               return rewriteValueAMD64_OpCvt64Fto64(v)
        case OpCvt64to32F:
                v.Op = OpAMD64CVTSQ2SS
                return true
@@ -25511,6 +25508,190 @@ func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpCvt32Fto32(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Cvt32Fto32 <t> x)
+       // cond: base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (XORL <t> y (SARLconst <t> [31] (ANDL <t> y:(CVTTSS2SL <t> x) (NOTL <typ.Int32> (MOVLf2i x)))))
+       for {
+               t := v.Type
+               x := v_0
+               if !(base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64XORL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t)
+               v0.AuxInt = int8ToAuxInt(31)
+               v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t)
+               y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SL, t)
+               y.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32)
+               v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v4.AddArg(x)
+               v3.AddArg(v4)
+               v1.AddArg2(y, v3)
+               v0.AddArg(v1)
+               v.AddArg2(y, v0)
+               return true
+       }
+       // match: (Cvt32Fto32 <t> x)
+       // cond: !base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (CVTTSS2SL <t> x)
+       for {
+               t := v.Type
+               x := v_0
+               if !(!base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64CVTTSS2SL)
+               v.Type = t
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpCvt32Fto64(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Cvt32Fto64 <t> x)
+       // cond: base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (XORQ <t> y (SARQconst <t> [63] (ANDQ <t> y:(CVTTSS2SQ <t> x) (NOTQ <typ.Int64> (MOVQf2i (CVTSS2SD <typ.Float64> x))) )))
+       for {
+               t := v.Type
+               x := v_0
+               if !(base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64XORQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t)
+               v0.AuxInt = int8ToAuxInt(63)
+               v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t)
+               y := b.NewValue0(v.Pos, OpAMD64CVTTSS2SQ, t)
+               y.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64)
+               v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v5 := b.NewValue0(v.Pos, OpAMD64CVTSS2SD, typ.Float64)
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v3.AddArg(v4)
+               v1.AddArg2(y, v3)
+               v0.AddArg(v1)
+               v.AddArg2(y, v0)
+               return true
+       }
+       // match: (Cvt32Fto64 <t> x)
+       // cond: !base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (CVTTSS2SQ <t> x)
+       for {
+               t := v.Type
+               x := v_0
+               if !(!base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64CVTTSS2SQ)
+               v.Type = t
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpCvt64Fto32(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Cvt64Fto32 <t> x)
+       // cond: base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (XORL <t> y (SARLconst <t> [31] (ANDL <t> y:(CVTTSD2SL <t> x) (NOTL <typ.Int32> (MOVLf2i (CVTSD2SS <typ.Float32> x))))))
+       for {
+               t := v.Type
+               x := v_0
+               if !(base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64XORL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64SARLconst, t)
+               v0.AuxInt = int8ToAuxInt(31)
+               v1 := b.NewValue0(v.Pos, OpAMD64ANDL, t)
+               y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SL, t)
+               y.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpAMD64NOTL, typ.Int32)
+               v4 := b.NewValue0(v.Pos, OpAMD64MOVLf2i, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpAMD64CVTSD2SS, typ.Float32)
+               v5.AddArg(x)
+               v4.AddArg(v5)
+               v3.AddArg(v4)
+               v1.AddArg2(y, v3)
+               v0.AddArg(v1)
+               v.AddArg2(y, v0)
+               return true
+       }
+       // match: (Cvt64Fto32 <t> x)
+       // cond: !base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (CVTTSD2SL <t> x)
+       for {
+               t := v.Type
+               x := v_0
+               if !(!base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64CVTTSD2SL)
+               v.Type = t
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpCvt64Fto64(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Cvt64Fto64 <t> x)
+       // cond: base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (XORQ <t> y (SARQconst <t> [63] (ANDQ <t> y:(CVTTSD2SQ <t> x) (NOTQ <typ.Int64> (MOVQf2i x)))))
+       for {
+               t := v.Type
+               x := v_0
+               if !(base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64XORQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64SARQconst, t)
+               v0.AuxInt = int8ToAuxInt(63)
+               v1 := b.NewValue0(v.Pos, OpAMD64ANDQ, t)
+               y := b.NewValue0(v.Pos, OpAMD64CVTTSD2SQ, t)
+               y.AddArg(x)
+               v3 := b.NewValue0(v.Pos, OpAMD64NOTQ, typ.Int64)
+               v4 := b.NewValue0(v.Pos, OpAMD64MOVQf2i, typ.UInt64)
+               v4.AddArg(x)
+               v3.AddArg(v4)
+               v1.AddArg2(y, v3)
+               v0.AddArg(v1)
+               v.AddArg2(y, v0)
+               return true
+       }
+       // match: (Cvt64Fto64 <t> x)
+       // cond: !base.ConvertHash.MatchPos(v.Pos, nil)
+       // result: (CVTTSD2SQ <t> x)
+       for {
+               t := v.Type
+               x := v_0
+               if !(!base.ConvertHash.MatchPos(v.Pos, nil)) {
+                       break
+               }
+               v.reset(OpAMD64CVTTSD2SQ)
+               v.Type = t
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpDiv16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 6c4afb78959791ddeb4a77eaafa943735351e104..2705195bd557910c31d84db994ba04aed8f23c97 100644 (file)
@@ -2574,13 +2574,13 @@ var fpConvOpToSSA = map[twoTypes]twoOpsAndType{
 
        {types.TFLOAT32, types.TUINT8}:  {ssa.OpCvt32Fto32, ssa.OpTrunc32to8, types.TINT32},
        {types.TFLOAT32, types.TUINT16}: {ssa.OpCvt32Fto32, ssa.OpTrunc32to16, types.TINT32},
-       {types.TFLOAT32, types.TUINT32}: {ssa.OpCvt32Fto64, ssa.OpTrunc64to32, types.TINT64}, // go wide to dodge unsigned
-       {types.TFLOAT32, types.TUINT64}: {ssa.OpInvalid, ssa.OpCopy, types.TUINT64},          // Cvt32Fto64U, branchy code expansion instead
+       {types.TFLOAT32, types.TUINT32}: {ssa.OpInvalid, ssa.OpCopy, types.TINT64},  // Cvt64Fto32U, branchy code expansion instead
+       {types.TFLOAT32, types.TUINT64}: {ssa.OpInvalid, ssa.OpCopy, types.TUINT64}, // Cvt32Fto64U, branchy code expansion instead
 
        {types.TFLOAT64, types.TUINT8}:  {ssa.OpCvt64Fto32, ssa.OpTrunc32to8, types.TINT32},
        {types.TFLOAT64, types.TUINT16}: {ssa.OpCvt64Fto32, ssa.OpTrunc32to16, types.TINT32},
-       {types.TFLOAT64, types.TUINT32}: {ssa.OpCvt64Fto64, ssa.OpTrunc64to32, types.TINT64}, // go wide to dodge unsigned
-       {types.TFLOAT64, types.TUINT64}: {ssa.OpInvalid, ssa.OpCopy, types.TUINT64},          // Cvt64Fto64U, branchy code expansion instead
+       {types.TFLOAT64, types.TUINT32}: {ssa.OpInvalid, ssa.OpCopy, types.TINT64},  // Cvt64Fto32U, branchy code expansion instead
+       {types.TFLOAT64, types.TUINT64}: {ssa.OpInvalid, ssa.OpCopy, types.TUINT64}, // Cvt64Fto64U, branchy code expansion instead
 
        // float
        {types.TFLOAT64, types.TFLOAT32}: {ssa.OpCvt64Fto32F, ssa.OpCopy, types.TFLOAT32},
@@ -2860,10 +2860,23 @@ func (s *state) conv(n ir.Node, v *ssa.Value, ft, tt *types.Type) *ssa.Value {
                }
                // ft is float32 or float64, and tt is unsigned integer
                if ft.Size() == 4 {
-                       return s.float32ToUint64(n, v, ft, tt)
+                       switch tt.Size() {
+                       case 8:
+                               return s.float32ToUint64(n, v, ft, tt)
+                       case 4, 2, 1:
+                               // TODO should 2 and 1 saturate or truncate?
+                               return s.float32ToUint32(n, v, ft, tt)
+                       }
                }
                if ft.Size() == 8 {
-                       return s.float64ToUint64(n, v, ft, tt)
+                       switch tt.Size() {
+                       case 8:
+                               return s.float64ToUint64(n, v, ft, tt)
+                       case 4, 2, 1:
+                               // TODO should 2 and 1 saturate or truncate?
+                               return s.float64ToUint32(n, v, ft, tt)
+                       }
+
                }
                s.Fatalf("weird float to unsigned integer conversion %v -> %v", ft, tt)
                return nil
@@ -5553,7 +5566,9 @@ func (s *state) uint64Tofloat(cvttab *u642fcvtTab, n ir.Node, x *ssa.Value, ft,
        // equal to 10000000001; that rounds up, and the 1 cannot
        // be lost else it would round down if the LSB of the
        // candidate mantissa is 0.
+
        cmp := s.newValue2(cvttab.leq, types.Types[types.TBOOL], s.zeroVal(ft), x)
+
        b := s.endBlock()
        b.Kind = ssa.BlockIf
        b.SetControl(cmp)
@@ -5779,34 +5794,61 @@ func (s *state) float64ToUint32(n ir.Node, x *ssa.Value, ft, tt *types.Type) *ss
 func (s *state) floatToUint(cvttab *f2uCvtTab, n ir.Node, x *ssa.Value, ft, tt *types.Type) *ssa.Value {
        // cutoff:=1<<(intY_Size-1)
        // if x < floatX(cutoff) {
-       //      result = uintY(x)
+       //      result = uintY(x) // bThen
+       //      if x < 0 { // unlikely
+       //              result = 0 // bZero
+       //      }
        // } else {
-       //      y = x - floatX(cutoff)
+       //      y = x - floatX(cutoff) // bElse
        //      z = uintY(y)
        //      result = z | -(cutoff)
        // }
+
        cutoff := cvttab.floatValue(s, ft, float64(cvttab.cutoff))
-       cmp := s.newValue2(cvttab.ltf, types.Types[types.TBOOL], x, cutoff)
+       cmp := s.newValueOrSfCall2(cvttab.ltf, types.Types[types.TBOOL], x, cutoff)
        b := s.endBlock()
        b.Kind = ssa.BlockIf
        b.SetControl(cmp)
        b.Likely = ssa.BranchLikely
 
-       bThen := s.f.NewBlock(ssa.BlockPlain)
+       var bThen, bZero *ssa.Block
+       newConversion := base.ConvertHash.MatchPos(n.Pos(), nil)
+       if newConversion {
+               bZero = s.f.NewBlock(ssa.BlockPlain)
+               bThen = s.f.NewBlock(ssa.BlockIf)
+       } else {
+               bThen = s.f.NewBlock(ssa.BlockPlain)
+       }
+
        bElse := s.f.NewBlock(ssa.BlockPlain)
        bAfter := s.f.NewBlock(ssa.BlockPlain)
 
        b.AddEdgeTo(bThen)
        s.startBlock(bThen)
-       a0 := s.newValue1(cvttab.cvt2U, tt, x)
+       a0 := s.newValueOrSfCall1(cvttab.cvt2U, tt, x)
        s.vars[n] = a0
-       s.endBlock()
-       bThen.AddEdgeTo(bAfter)
+
+       if newConversion {
+               cmpz := s.newValueOrSfCall2(cvttab.ltf, types.Types[types.TBOOL], x, cvttab.floatValue(s, ft, 0.0))
+               s.endBlock()
+               bThen.SetControl(cmpz)
+               bThen.AddEdgeTo(bZero)
+               bThen.Likely = ssa.BranchUnlikely
+               bThen.AddEdgeTo(bAfter)
+
+               s.startBlock(bZero)
+               s.vars[n] = cvttab.intValue(s, tt, 0)
+               s.endBlock()
+               bZero.AddEdgeTo(bAfter)
+       } else {
+               s.endBlock()
+               bThen.AddEdgeTo(bAfter)
+       }
 
        b.AddEdgeTo(bElse)
        s.startBlock(bElse)
-       y := s.newValue2(cvttab.subf, ft, x, cutoff)
-       y = s.newValue1(cvttab.cvt2U, tt, y)
+       y := s.newValueOrSfCall2(cvttab.subf, ft, x, cutoff)
+       y = s.newValueOrSfCall1(cvttab.cvt2U, tt, y)
        z := cvttab.intValue(s, tt, int64(-cvttab.cutoff))
        a1 := s.newValue2(cvttab.or, tt, y, z)
        s.vars[n] = a1
diff --git a/test/convert5.go b/test/convert5.go
new file mode 100644 (file)
index 0000000..1bd74ab
--- /dev/null
@@ -0,0 +1,268 @@
+// run
+
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !wasm && !386 && !arm && !mips
+
+// TODO fix this to work for wasm and 32-bit architectures.
+// Doing more than this, however, expands the change.
+
+package main
+
+import (
+       "fmt"
+       "runtime"
+)
+
+// This test checks that conversion from floats to (unsigned) 32 and 64-bit
+// integers has the same sensible behavior for corner cases, and that the
+// conversions to smaller integers agree.  Because outliers are platform-
+// independent, the "golden test" for smaller integers is more like of
+// a "gold-ish test" and subject to change.
+
+//go:noinline
+func id[T any](x T) T {
+       return x
+}
+
+//go:noinline
+func want[T comparable](name string, x, y T) {
+       if x != y {
+               _, _, line, _ := runtime.Caller(1)
+               fmt.Println("FAIL at line", line, "var =", name, "got =", x, "want =", y)
+       }
+}
+
+//go:noinline
+func log[T comparable](name string, x T) {
+       fmt.Println(name, x)
+}
+
+const (
+       // pX = max positive signed X bit
+       // nX = min negative signed X bit
+       // uX = max unsigned X bit
+       // tX = two to the X
+       p32 = 2147483647
+       n32 = -2147483648
+       u32 = 4294967295
+       p64 = 9223372036854775807
+       n64 = -9223372036854775808
+       u64 = 18446744073709551615
+       t44 = 1 << 44
+)
+
+func main() {
+       one := 1.0
+       minus1_32 := id(float32(-1.0))
+       minus1_64 := id(float64(-1.0))
+       p32_plus4k_plus1 := id(float32(p32 + 4096 + 1)) // want this to be precise and fit in 24 bits mantissa
+       p64_plus4k_plus1 := id(float64(p64 + 4096 + 1)) // want this to be precise and fit in 53 bits mantissa
+       n32_minus4k := id(float32(n32 - 4096))
+       n64_minus4k := id(float64(n64 - 4096))
+       inf_32 := id(float32(one / 0))
+       inf_64 := id(float64(one / 0))
+       ninf_32 := id(float32(-one / 0))
+       ninf_64 := id(float64(-one / 0))
+
+       // int32 conversions
+       int32Tests := []struct {
+               name     string
+               input    any // Use any to handle both float32 and float64
+               expected int32
+       }{
+               {"minus1_32", minus1_32, -1},
+               {"minus1_64", minus1_64, -1},
+               {"p32_plus4k_plus1", p32_plus4k_plus1, p32},
+               {"p64_plus4k_plus1", p64_plus4k_plus1, p32},
+               {"n32_minus4k", n32_minus4k, n32},
+               {"n64_minus4k", n64_minus4k, n32},
+               {"inf_32", inf_32, p32},
+               {"inf_64", inf_64, p32},
+               {"ninf_32", ninf_32, n32},
+               {"ninf_64", ninf_64, n32},
+       }
+
+       for _, test := range int32Tests {
+               var converted int32
+               switch v := test.input.(type) {
+               case float32:
+                       converted = int32(v)
+               case float64:
+                       converted = int32(v)
+               }
+               want(test.name, converted, test.expected)
+       }
+
+       // int64 conversions
+       int64Tests := []struct {
+               name     string
+               input    any
+               expected int64
+       }{
+               {"minus1_32", minus1_32, -1},
+               {"minus1_64", minus1_64, -1},
+               {"p32_plus4k_plus1", p32_plus4k_plus1, p32 + 4096 + 1},
+               {"p64_plus4k_plus1", p64_plus4k_plus1, p64},
+               {"n32_minus4k", n32_minus4k, n32 - 4096},
+               {"n64_minus4k", n64_minus4k, n64},
+               {"inf_32", inf_32, p64},
+               {"inf_64", inf_64, p64},
+               {"ninf_32", ninf_32, n64},
+               {"ninf_64", ninf_64, n64},
+       }
+
+       for _, test := range int64Tests {
+               var converted int64
+               switch v := test.input.(type) {
+               case float32:
+                       converted = int64(v)
+               case float64:
+                       converted = int64(v)
+               }
+               want(test.name, converted, test.expected)
+       }
+
+       // uint32 conversions
+       uint32Tests := []struct {
+               name     string
+               input    any
+               expected uint32
+       }{
+               {"minus1_32", minus1_32, 0},
+               {"minus1_64", minus1_64, 0},
+               {"p32_plus4k_plus1", p32_plus4k_plus1, p32 + 4096 + 1},
+               {"p64_plus4k_plus1", p64_plus4k_plus1, u32},
+               {"n32_minus4k", n32_minus4k, 0},
+               {"n64_minus4k", n64_minus4k, 0},
+               {"inf_32", inf_32, u32},
+               {"inf_64", inf_64, u32},
+               {"ninf_32", ninf_32, 0},
+               {"ninf_64", ninf_64, 0},
+       }
+
+       for _, test := range uint32Tests {
+               var converted uint32
+               switch v := test.input.(type) {
+               case float32:
+                       converted = uint32(v)
+               case float64:
+                       converted = uint32(v)
+               }
+               want(test.name, converted, test.expected)
+       }
+
+       u64_plus4k_plus1_64 := id(float64(u64 + 4096 + 1))
+       u64_plust44_plus1_32 := id(float32(u64 + t44 + 1))
+
+       // uint64 conversions
+       uint64Tests := []struct {
+               name     string
+               input    any
+               expected uint64
+       }{
+               {"minus1_32", minus1_32, 0},
+               {"minus1_64", minus1_64, 0},
+               {"p32_plus4k_plus1", p32_plus4k_plus1, p32 + 4096 + 1},
+               {"p64_plus4k_plus1", p64_plus4k_plus1, p64 + 4096 + 1},
+               {"n32_minus4k", n32_minus4k, 0},
+               {"n64_minus4k", n64_minus4k, 0},
+               {"inf_32", inf_32, u64},
+               {"inf_64", inf_64, u64},
+               {"ninf_32", ninf_32, 0},
+               {"ninf_64", ninf_64, 0},
+               {"u64_plus4k_plus1_64", u64_plus4k_plus1_64, u64},
+               {"u64_plust44_plus1_32", u64_plust44_plus1_32, u64},
+       }
+
+       for _, test := range uint64Tests {
+               var converted uint64
+               switch v := test.input.(type) {
+               case float32:
+                       converted = uint64(v)
+               case float64:
+                       converted = uint64(v)
+               }
+               want(test.name, converted, test.expected)
+       }
+
+       // for smaller integer types
+       // TODO the overflow behavior is dubious, maybe we should fix it to be more sensible, e.g. saturating.
+       fmt.Println("Below this are 'golden' results to check for consistency across platforms.  Overflow behavior is not necessarily what we want")
+
+       u8plus2 := id(float64(257))
+       p8minus1 := id(float32(126))
+       n8plus2 := id(float64(-126))
+       n8minusone := id(float32(-129))
+
+       fmt.Println("\nuint8 conversions")
+       uint8Tests := []struct {
+               name  string
+               input any
+       }{
+               {"minus1_32", minus1_32},
+               {"minus1_64", minus1_64},
+               {"p32_plus4k_plus1", p32_plus4k_plus1},
+               {"p64_plus4k_plus1", p64_plus4k_plus1},
+               {"n32_minus4k", n32_minus4k},
+               {"n64_minus4k", n64_minus4k},
+               {"inf_32", inf_32},
+               {"inf_64", inf_64},
+               {"ninf_32", ninf_32},
+               {"ninf_64", ninf_64},
+               {"u64_plus4k_plus1_64", u64_plus4k_plus1_64},
+               {"u64_plust44_plus1_32", u64_plust44_plus1_32},
+               {"u8plus2", u8plus2},
+               {"p8minus1", p8minus1},
+               {"n8plus2", n8plus2},
+               {"n8minusone", n8minusone},
+       }
+
+       for _, test := range uint8Tests {
+               var converted uint8
+               switch v := test.input.(type) {
+               case float32:
+                       converted = uint8(v)
+               case float64:
+                       converted = uint8(v)
+               }
+               log(test.name, converted)
+       }
+
+       fmt.Println("\nint8 conversions")
+       int8Tests := []struct {
+               name  string
+               input any
+       }{
+               {"minus1_32", minus1_32},
+               {"minus1_64", minus1_64},
+               {"p32_plus4k_plus1", p32_plus4k_plus1},
+               {"p64_plus4k_plus1", p64_plus4k_plus1},
+               {"n32_minus4k", n32_minus4k},
+               {"n64_minus4k", n64_minus4k},
+               {"inf_32", inf_32},
+               {"inf_64", inf_64},
+               {"ninf_32", ninf_32},
+               {"ninf_64", ninf_64},
+               {"u64_plus4k_plus1_64", u64_plus4k_plus1_64},
+               {"u64_plust44_plus1_32", u64_plust44_plus1_32},
+               {"u8plus2", u8plus2},
+               {"p8minus1", p8minus1},
+               {"n8plus2", n8plus2},
+               {"n8minusone", n8minusone},
+       }
+
+       for _, test := range int8Tests {
+               var converted int8
+               switch v := test.input.(type) {
+               case float32:
+                       converted = int8(v)
+               case float64:
+                       converted = int8(v)
+               }
+               log(test.name, converted)
+       }
+
+}
diff --git a/test/convert5.out b/test/convert5.out
new file mode 100644 (file)
index 0000000..47a8af6
--- /dev/null
@@ -0,0 +1,37 @@
+Below this are 'golden' results to check for consistency across platforms.  Overflow behavior is not necessarily what we want
+
+uint8 conversions
+minus1_32 255
+minus1_64 255
+p32_plus4k_plus1 255
+p64_plus4k_plus1 255
+n32_minus4k 0
+n64_minus4k 0
+inf_32 255
+inf_64 255
+ninf_32 0
+ninf_64 0
+u64_plus4k_plus1_64 255
+u64_plust44_plus1_32 255
+u8plus2 1
+p8minus1 126
+n8plus2 130
+n8minusone 127
+
+int8 conversions
+minus1_32 -1
+minus1_64 -1
+p32_plus4k_plus1 -1
+p64_plus4k_plus1 -1
+n32_minus4k 0
+n64_minus4k 0
+inf_32 -1
+inf_64 -1
+ninf_32 0
+ninf_64 0
+u64_plus4k_plus1_64 -1
+u64_plust44_plus1_32 -1
+u8plus2 1
+p8minus1 126
+n8plus2 -126
+n8minusone 127