From 6be1c09e199a0f32f07e39c2629a5b12a3aec9e2 Mon Sep 17 00:00:00 2001 From: Vladimir Stefanovic Date: Fri, 10 Nov 2017 18:08:48 +0100 Subject: [PATCH] cmd/compile: use soft-float routines for soft-float targets Updates #18162 (mostly fixes) Change-Id: I35bcb8a688bdaa432adb0ddbb73a2f7adda47b9e Reviewed-on: https://go-review.googlesource.com/37958 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/gc/go.go | 7 +- src/cmd/compile/internal/gc/main.go | 6 + src/cmd/compile/internal/gc/ssa.go | 181 ++++++++++++++++++---- src/cmd/compile/internal/gc/ssa_test.go | 23 ++- src/cmd/compile/internal/gc/subr.go | 15 ++ src/cmd/compile/internal/gc/walk.go | 5 + src/cmd/compile/internal/mips/galign.go | 1 + src/cmd/compile/internal/ssa/check.go | 4 + src/cmd/compile/internal/ssa/compile.go | 3 + src/cmd/compile/internal/ssa/config.go | 1 + src/cmd/compile/internal/ssa/softfloat.go | 66 ++++++++ 11 files changed, 271 insertions(+), 41 deletions(-) create mode 100644 src/cmd/compile/internal/ssa/softfloat.go diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index 58b7cf8e9f..dc94cf4f98 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -243,9 +243,10 @@ var autogeneratedPos src.XPos type Arch struct { LinkArch *obj.LinkArch - REGSP int - MAXWIDTH int64 - Use387 bool // should 386 backend use 387 FP instructions instead of sse2. + REGSP int + MAXWIDTH int64 + Use387 bool // should 386 backend use 387 FP instructions instead of sse2. + SoftFloat bool PadFrame func(int64) int64 ZeroRange func(*Progs, *obj.Prog, int64, int64, *uint32) *obj.Prog diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 7fd04e3f08..4e470b6d8d 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -49,6 +49,7 @@ var ( Debug_locationlist int Debug_typecheckinl int Debug_gendwarfinl int + Debug_softfloat int ) // Debug arguments. @@ -78,6 +79,7 @@ var debugtab = []struct { {"locationlists", "print information about DWARF location list creation", &Debug_locationlist}, {"typecheckinl", "eager typechecking of inline function bodies", &Debug_typecheckinl}, {"dwarfinl", "print information about DWARF inlined function creation", &Debug_gendwarfinl}, + {"softfloat", "force compiler to emit soft-float code", &Debug_softfloat}, } const debugHelpHeader = `usage: -d arg[,arg]* and arg is [=] @@ -393,6 +395,10 @@ func Main(archInit func(*Arch)) { dwarf.EnableLogging(Debug_gendwarfinl != 0) } + if Debug_softfloat != 0 { + thearch.SoftFloat = true + } + // enable inlining. for now: // default: inlining on. (debug['l'] == 1) // -l: inlining off (debug['l'] == 0) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 0bd5cea73e..dfa2d081d1 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -49,6 +49,11 @@ func initssaconfig() { Float64Ptr: types.NewPtr(types.Types[TFLOAT64]), BytePtrPtr: types.NewPtr(types.NewPtr(types.Types[TUINT8])), } + + if thearch.SoftFloat { + softfloatInit() + } + // Generate a few pointer types that are uncommon in the frontend but common in the backend. // Caching is disabled in the backend, so generating these here avoids allocations. _ = types.NewPtr(types.Types[TINTER]) // *interface{} @@ -68,6 +73,7 @@ func initssaconfig() { if thearch.LinkArch.Name == "386" { ssaConfig.Set387(thearch.Use387) } + ssaConfig.SoftFloat = thearch.SoftFloat ssaCaches = make([]ssa.Cache, nBackendWorkers) // Set up some runtime functions we'll need to call. @@ -139,6 +145,7 @@ func buildssa(fn *Node, worker int) *ssa.Func { } s.exitCode = fn.Func.Exit s.panics = map[funcLine]*ssa.Block{} + s.softFloat = s.config.SoftFloat if name == os.Getenv("GOSSAFUNC") { s.f.HTMLWriter = ssa.NewHTMLWriter("ssa.html", s.f.Frontend(), name) @@ -310,6 +317,7 @@ type state struct { cgoUnsafeArgs bool hasdefer bool // whether the function contains a defer statement + softFloat bool } type funcLine struct { @@ -553,6 +561,25 @@ func (s *state) constOffPtrSP(t *types.Type, c int64) *ssa.Value { return s.f.ConstOffPtrSP(s.peekPos(), t, c, s.sp) } +// newValueOrSfCall* are wrappers around newValue*, which may create a call to a +// soft-float runtime function instead (when emitting soft-float code). +func (s *state) newValueOrSfCall1(op ssa.Op, t *types.Type, arg *ssa.Value) *ssa.Value { + if s.softFloat { + if c, ok := s.sfcall(op, arg); ok { + return c + } + } + return s.newValue1(op, t, arg) +} +func (s *state) newValueOrSfCall2(op ssa.Op, t *types.Type, arg0, arg1 *ssa.Value) *ssa.Value { + if s.softFloat { + if c, ok := s.sfcall(op, arg0, arg1); ok { + return c + } + } + return s.newValue2(op, t, arg0, arg1) +} + // stmtList converts the statement list n to SSA and adds it to s. func (s *state) stmtList(l Nodes) { for _, n := range l.Slice() { @@ -1689,18 +1716,18 @@ func (s *state) expr(n *Node) *ssa.Value { if ft.IsFloat() || tt.IsFloat() { conv, ok := fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}] - if s.config.RegSize == 4 && thearch.LinkArch.Family != sys.MIPS { + if s.config.RegSize == 4 && thearch.LinkArch.Family != sys.MIPS && !s.softFloat { if conv1, ok1 := fpConvOpToSSA32[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 { conv = conv1 } } - if thearch.LinkArch.Family == sys.ARM64 { + if thearch.LinkArch.Family == sys.ARM64 || s.softFloat { if conv1, ok1 := uint64fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 { conv = conv1 } } - if thearch.LinkArch.Family == sys.MIPS { + if thearch.LinkArch.Family == sys.MIPS && !s.softFloat { if ft.Size() == 4 && ft.IsInteger() && !ft.IsSigned() { // tt is float32 or float64, and ft is also unsigned if tt.Size() == 4 { @@ -1731,12 +1758,12 @@ func (s *state) expr(n *Node) *ssa.Value { if op2 == ssa.OpCopy { return x } - return s.newValue1(op2, n.Type, x) + return s.newValueOrSfCall1(op2, n.Type, x) } if op2 == ssa.OpCopy { - return s.newValue1(op1, n.Type, x) + return s.newValueOrSfCall1(op1, n.Type, x) } - return s.newValue1(op2, n.Type, s.newValue1(op1, types.Types[it], x)) + return s.newValueOrSfCall1(op2, n.Type, s.newValueOrSfCall1(op1, types.Types[it], x)) } // Tricky 64-bit unsigned cases. if ft.IsInteger() { @@ -1781,8 +1808,8 @@ func (s *state) expr(n *Node) *ssa.Value { ftp := floatForComplex(ft) ttp := floatForComplex(tt) return s.newValue2(ssa.OpComplexMake, tt, - s.newValue1(op, ttp, s.newValue1(ssa.OpComplexReal, ftp, x)), - s.newValue1(op, ttp, s.newValue1(ssa.OpComplexImag, ftp, x))) + s.newValueOrSfCall1(op, ttp, s.newValue1(ssa.OpComplexReal, ftp, x)), + s.newValueOrSfCall1(op, ttp, s.newValue1(ssa.OpComplexImag, ftp, x))) } s.Fatalf("unhandled OCONV %s -> %s", n.Left.Type.Etype, n.Type.Etype) @@ -1799,8 +1826,8 @@ func (s *state) expr(n *Node) *ssa.Value { if n.Left.Type.IsComplex() { pt := floatForComplex(n.Left.Type) op := s.ssaOp(OEQ, pt) - r := s.newValue2(op, types.Types[TBOOL], s.newValue1(ssa.OpComplexReal, pt, a), s.newValue1(ssa.OpComplexReal, pt, b)) - i := s.newValue2(op, types.Types[TBOOL], s.newValue1(ssa.OpComplexImag, pt, a), s.newValue1(ssa.OpComplexImag, pt, b)) + r := s.newValueOrSfCall2(op, types.Types[TBOOL], s.newValue1(ssa.OpComplexReal, pt, a), s.newValue1(ssa.OpComplexReal, pt, b)) + i := s.newValueOrSfCall2(op, types.Types[TBOOL], s.newValue1(ssa.OpComplexImag, pt, a), s.newValue1(ssa.OpComplexImag, pt, b)) c := s.newValue2(ssa.OpAndB, types.Types[TBOOL], r, i) switch n.Op { case OEQ: @@ -1811,6 +1838,9 @@ func (s *state) expr(n *Node) *ssa.Value { s.Fatalf("ordered complex compare %v", n.Op) } } + if n.Left.Type.IsFloat() { + return s.newValueOrSfCall2(s.ssaOp(n.Op, n.Left.Type), types.Types[TBOOL], a, b) + } return s.newValue2(s.ssaOp(n.Op, n.Left.Type), types.Types[TBOOL], a, b) case OMUL: a := s.expr(n.Left) @@ -1828,22 +1858,27 @@ func (s *state) expr(n *Node) *ssa.Value { bimag := s.newValue1(ssa.OpComplexImag, pt, b) if pt != wt { // Widen for calculation - areal = s.newValue1(ssa.OpCvt32Fto64F, wt, areal) - breal = s.newValue1(ssa.OpCvt32Fto64F, wt, breal) - aimag = s.newValue1(ssa.OpCvt32Fto64F, wt, aimag) - bimag = s.newValue1(ssa.OpCvt32Fto64F, wt, bimag) + areal = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, areal) + breal = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, breal) + aimag = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, aimag) + bimag = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, bimag) } - xreal := s.newValue2(subop, wt, s.newValue2(mulop, wt, areal, breal), s.newValue2(mulop, wt, aimag, bimag)) - ximag := s.newValue2(addop, wt, s.newValue2(mulop, wt, areal, bimag), s.newValue2(mulop, wt, aimag, breal)) + xreal := s.newValueOrSfCall2(subop, wt, s.newValueOrSfCall2(mulop, wt, areal, breal), s.newValueOrSfCall2(mulop, wt, aimag, bimag)) + ximag := s.newValueOrSfCall2(addop, wt, s.newValueOrSfCall2(mulop, wt, areal, bimag), s.newValueOrSfCall2(mulop, wt, aimag, breal)) if pt != wt { // Narrow to store back - xreal = s.newValue1(ssa.OpCvt64Fto32F, pt, xreal) - ximag = s.newValue1(ssa.OpCvt64Fto32F, pt, ximag) + xreal = s.newValueOrSfCall1(ssa.OpCvt64Fto32F, pt, xreal) + ximag = s.newValueOrSfCall1(ssa.OpCvt64Fto32F, pt, ximag) } return s.newValue2(ssa.OpComplexMake, n.Type, xreal, ximag) } + + if n.Type.IsFloat() { + return s.newValueOrSfCall2(s.ssaOp(n.Op, n.Type), a.Type, a, b) + } + return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b) case ODIV: @@ -1866,31 +1901,31 @@ func (s *state) expr(n *Node) *ssa.Value { bimag := s.newValue1(ssa.OpComplexImag, pt, b) if pt != wt { // Widen for calculation - areal = s.newValue1(ssa.OpCvt32Fto64F, wt, areal) - breal = s.newValue1(ssa.OpCvt32Fto64F, wt, breal) - aimag = s.newValue1(ssa.OpCvt32Fto64F, wt, aimag) - bimag = s.newValue1(ssa.OpCvt32Fto64F, wt, bimag) + areal = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, areal) + breal = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, breal) + aimag = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, aimag) + bimag = s.newValueOrSfCall1(ssa.OpCvt32Fto64F, wt, bimag) } - denom := s.newValue2(addop, wt, s.newValue2(mulop, wt, breal, breal), s.newValue2(mulop, wt, bimag, bimag)) - xreal := s.newValue2(addop, wt, s.newValue2(mulop, wt, areal, breal), s.newValue2(mulop, wt, aimag, bimag)) - ximag := s.newValue2(subop, wt, s.newValue2(mulop, wt, aimag, breal), s.newValue2(mulop, wt, areal, bimag)) + denom := s.newValueOrSfCall2(addop, wt, s.newValueOrSfCall2(mulop, wt, breal, breal), s.newValueOrSfCall2(mulop, wt, bimag, bimag)) + xreal := s.newValueOrSfCall2(addop, wt, s.newValueOrSfCall2(mulop, wt, areal, breal), s.newValueOrSfCall2(mulop, wt, aimag, bimag)) + ximag := s.newValueOrSfCall2(subop, wt, s.newValueOrSfCall2(mulop, wt, aimag, breal), s.newValueOrSfCall2(mulop, wt, areal, bimag)) // TODO not sure if this is best done in wide precision or narrow // Double-rounding might be an issue. // Note that the pre-SSA implementation does the entire calculation // in wide format, so wide is compatible. - xreal = s.newValue2(divop, wt, xreal, denom) - ximag = s.newValue2(divop, wt, ximag, denom) + xreal = s.newValueOrSfCall2(divop, wt, xreal, denom) + ximag = s.newValueOrSfCall2(divop, wt, ximag, denom) if pt != wt { // Narrow to store back - xreal = s.newValue1(ssa.OpCvt64Fto32F, pt, xreal) - ximag = s.newValue1(ssa.OpCvt64Fto32F, pt, ximag) + xreal = s.newValueOrSfCall1(ssa.OpCvt64Fto32F, pt, xreal) + ximag = s.newValueOrSfCall1(ssa.OpCvt64Fto32F, pt, ximag) } return s.newValue2(ssa.OpComplexMake, n.Type, xreal, ximag) } if n.Type.IsFloat() { - return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b) + return s.newValueOrSfCall2(s.ssaOp(n.Op, n.Type), a.Type, a, b) } return s.intDivide(n, a, b) case OMOD: @@ -1904,8 +1939,11 @@ func (s *state) expr(n *Node) *ssa.Value { pt := floatForComplex(n.Type) op := s.ssaOp(n.Op, pt) return s.newValue2(ssa.OpComplexMake, n.Type, - s.newValue2(op, pt, s.newValue1(ssa.OpComplexReal, pt, a), s.newValue1(ssa.OpComplexReal, pt, b)), - s.newValue2(op, pt, s.newValue1(ssa.OpComplexImag, pt, a), s.newValue1(ssa.OpComplexImag, pt, b))) + s.newValueOrSfCall2(op, pt, s.newValue1(ssa.OpComplexReal, pt, a), s.newValue1(ssa.OpComplexReal, pt, b)), + s.newValueOrSfCall2(op, pt, s.newValue1(ssa.OpComplexImag, pt, a), s.newValue1(ssa.OpComplexImag, pt, b))) + } + if n.Type.IsFloat() { + return s.newValueOrSfCall2(s.ssaOp(n.Op, n.Type), a.Type, a, b) } return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b) case OAND, OOR, OXOR: @@ -2564,6 +2602,79 @@ const ( callGo ) +type sfRtCallDef struct { + rtfn *obj.LSym + rtype types.EType +} + +var softFloatOps map[ssa.Op]sfRtCallDef + +func softfloatInit() { + // Some of these operations get transformed by sfcall. + softFloatOps = map[ssa.Op]sfRtCallDef{ + ssa.OpAdd32F: sfRtCallDef{sysfunc("fadd32"), TFLOAT32}, + ssa.OpAdd64F: sfRtCallDef{sysfunc("fadd64"), TFLOAT64}, + ssa.OpSub32F: sfRtCallDef{sysfunc("fadd32"), TFLOAT32}, + ssa.OpSub64F: sfRtCallDef{sysfunc("fadd64"), TFLOAT64}, + ssa.OpMul32F: sfRtCallDef{sysfunc("fmul32"), TFLOAT32}, + ssa.OpMul64F: sfRtCallDef{sysfunc("fmul64"), TFLOAT64}, + ssa.OpDiv32F: sfRtCallDef{sysfunc("fdiv32"), TFLOAT32}, + ssa.OpDiv64F: sfRtCallDef{sysfunc("fdiv64"), TFLOAT64}, + + ssa.OpEq64F: sfRtCallDef{sysfunc("feq64"), TBOOL}, + ssa.OpEq32F: sfRtCallDef{sysfunc("feq32"), TBOOL}, + ssa.OpNeq64F: sfRtCallDef{sysfunc("feq64"), TBOOL}, + ssa.OpNeq32F: sfRtCallDef{sysfunc("feq32"), TBOOL}, + ssa.OpLess64F: sfRtCallDef{sysfunc("fgt64"), TBOOL}, + ssa.OpLess32F: sfRtCallDef{sysfunc("fgt32"), TBOOL}, + ssa.OpGreater64F: sfRtCallDef{sysfunc("fgt64"), TBOOL}, + ssa.OpGreater32F: sfRtCallDef{sysfunc("fgt32"), TBOOL}, + ssa.OpLeq64F: sfRtCallDef{sysfunc("fge64"), TBOOL}, + ssa.OpLeq32F: sfRtCallDef{sysfunc("fge32"), TBOOL}, + ssa.OpGeq64F: sfRtCallDef{sysfunc("fge64"), TBOOL}, + ssa.OpGeq32F: sfRtCallDef{sysfunc("fge32"), TBOOL}, + + ssa.OpCvt32to32F: sfRtCallDef{sysfunc("fint32to32"), TFLOAT32}, + ssa.OpCvt32Fto32: sfRtCallDef{sysfunc("f32toint32"), TINT32}, + ssa.OpCvt64to32F: sfRtCallDef{sysfunc("fint64to32"), TFLOAT32}, + ssa.OpCvt32Fto64: sfRtCallDef{sysfunc("f32toint64"), TINT64}, + ssa.OpCvt64Uto32F: sfRtCallDef{sysfunc("fuint64to32"), TFLOAT32}, + ssa.OpCvt32Fto64U: sfRtCallDef{sysfunc("f32touint64"), TUINT64}, + ssa.OpCvt32to64F: sfRtCallDef{sysfunc("fint32to64"), TFLOAT64}, + ssa.OpCvt64Fto32: sfRtCallDef{sysfunc("f64toint32"), TINT32}, + ssa.OpCvt64to64F: sfRtCallDef{sysfunc("fint64to64"), TFLOAT64}, + ssa.OpCvt64Fto64: sfRtCallDef{sysfunc("f64toint64"), TINT64}, + ssa.OpCvt64Uto64F: sfRtCallDef{sysfunc("fuint64to64"), TFLOAT64}, + ssa.OpCvt64Fto64U: sfRtCallDef{sysfunc("f64touint64"), TUINT64}, + ssa.OpCvt32Fto64F: sfRtCallDef{sysfunc("f32to64"), TFLOAT64}, + ssa.OpCvt64Fto32F: sfRtCallDef{sysfunc("f64to32"), TFLOAT32}, + } +} + +// TODO: do not emit sfcall if operation can be optimized to constant in later +// opt phase +func (s *state) sfcall(op ssa.Op, args ...*ssa.Value) (*ssa.Value, bool) { + if callDef, ok := softFloatOps[op]; ok { + switch op { + case ssa.OpLess32F, + ssa.OpLess64F, + ssa.OpLeq32F, + ssa.OpLeq64F: + args[0], args[1] = args[1], args[0] + case ssa.OpSub32F, + ssa.OpSub64F: + args[1] = s.newValue1(s.ssaOp(OMINUS, types.Types[callDef.rtype]), args[1].Type, args[1]) + } + + result := s.rtcall(callDef.rtfn, true, []*types.Type{types.Types[callDef.rtype]}, args...)[0] + if op == ssa.OpNeq32F || op == ssa.OpNeq64F { + result = s.newValue1(ssa.OpNot, result.Type, result) + } + return result, true + } + return nil, false +} + var intrinsics map[intrinsicKey]intrinsicBuilder // An intrinsicBuilder converts a call node n into an ssa value that @@ -3134,6 +3245,12 @@ func findIntrinsic(sym *types.Sym) intrinsicBuilder { // We can't intrinsify them. return nil } + // Skip intrinsifying math functions (which may contain hard-float + // instructions) when soft-float + if thearch.SoftFloat && pkg == "math" { + return nil + } + fn := sym.Name return intrinsics[intrinsicKey{thearch.LinkArch.Arch, pkg, fn}] } diff --git a/src/cmd/compile/internal/gc/ssa_test.go b/src/cmd/compile/internal/gc/ssa_test.go index 28670542e7..13fb98b276 100644 --- a/src/cmd/compile/internal/gc/ssa_test.go +++ b/src/cmd/compile/internal/gc/ssa_test.go @@ -18,20 +18,27 @@ import ( // TODO: move all these tests elsewhere? // Perhaps teach test/run.go how to run them with a new action verb. -func runTest(t *testing.T, filename string) { +func runTest(t *testing.T, filename string, flags ...string) { t.Parallel() - doTest(t, filename, "run") + doTest(t, filename, "run", flags...) } -func buildTest(t *testing.T, filename string) { +func buildTest(t *testing.T, filename string, flags ...string) { t.Parallel() - doTest(t, filename, "build") + doTest(t, filename, "build", flags...) } -func doTest(t *testing.T, filename string, kind string) { +func doTest(t *testing.T, filename string, kind string, flags ...string) { testenv.MustHaveGoBuild(t) gotool := testenv.GoToolPath(t) var stdout, stderr bytes.Buffer - cmd := exec.Command(gotool, kind, "-gcflags=-d=ssa/check/on", filepath.Join("testdata", filename)) + args := []string{kind} + if len(flags) == 0 { + args = append(args, "-gcflags=-d=ssa/check/on") + } else { + args = append(args, flags...) + } + args = append(args, filepath.Join("testdata", filename)) + cmd := exec.Command(gotool, args...) cmd.Stdout = &stdout cmd.Stderr = &stderr err := cmd.Run() @@ -113,6 +120,10 @@ func TestArithmetic(t *testing.T) { runTest(t, "arith.go") } // TestFP tests that both backends have the same result for floating point expressions. func TestFP(t *testing.T) { runTest(t, "fp.go") } +func TestFPSoftFloat(t *testing.T) { + runTest(t, "fp.go", "-gcflags=-d=softfloat,ssa/check/on") +} + // TestArithmeticBoundary tests boundary results for arithmetic operations. func TestArithmeticBoundary(t *testing.T) { runTest(t, "arithBoundary.go") } diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go index 0735a0c408..a45c15a44e 100644 --- a/src/cmd/compile/internal/gc/subr.go +++ b/src/cmd/compile/internal/gc/subr.go @@ -1165,6 +1165,21 @@ func calcHasCall(n *Node) bool { // These ops might panic, make sure they are done // before we start marshaling args for a call. See issue 16760. return true + + // When using soft-float, these ops might be rewritten to function calls + // so we ensure they are evaluated first. + case OADD, OSUB, OMINUS: + if thearch.SoftFloat && (isFloat[n.Type.Etype] || isComplex[n.Type.Etype]) { + return true + } + case OLT, OEQ, ONE, OLE, OGE, OGT: + if thearch.SoftFloat && (isFloat[n.Left.Type.Etype] || isComplex[n.Left.Type.Etype]) { + return true + } + case OCONV: + if thearch.SoftFloat && ((isFloat[n.Type.Etype] || isComplex[n.Type.Etype]) || (isFloat[n.Left.Type.Etype] || isComplex[n.Left.Type.Etype])) { + return true + } } if n.Left != nil && n.Left.HasCall() { diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go index 55e9211977..34c73acce0 100644 --- a/src/cmd/compile/internal/gc/walk.go +++ b/src/cmd/compile/internal/gc/walk.go @@ -988,6 +988,10 @@ opswitch: n = walkexpr(n, init) case OCONV, OCONVNOP: + if thearch.SoftFloat { + // For the soft-float case, ssa.go handles these conversions. + goto oconv_walkexpr + } switch thearch.LinkArch.Family { case sys.ARM, sys.MIPS: if n.Left.Type.IsFloat() { @@ -1041,6 +1045,7 @@ opswitch: } } + oconv_walkexpr: n.Left = walkexpr(n.Left, init) case OANDNOT: diff --git a/src/cmd/compile/internal/mips/galign.go b/src/cmd/compile/internal/mips/galign.go index 77ec78aabf..f207a17bbf 100644 --- a/src/cmd/compile/internal/mips/galign.go +++ b/src/cmd/compile/internal/mips/galign.go @@ -18,6 +18,7 @@ func Init(arch *gc.Arch) { } arch.REGSP = mips.REGSP arch.MAXWIDTH = (1 << 31) - 1 + arch.SoftFloat = (objabi.GOMIPS == "softfloat") arch.ZeroRange = zerorange arch.ZeroAuto = zeroAuto arch.Ginsnop = ginsnop diff --git a/src/cmd/compile/internal/ssa/check.go b/src/cmd/compile/internal/ssa/check.go index d0d1a7b912..1c2fcd7948 100644 --- a/src/cmd/compile/internal/ssa/check.go +++ b/src/cmd/compile/internal/ssa/check.go @@ -203,6 +203,10 @@ func checkFunc(f *Func) { } } + if f.RegAlloc != nil && f.Config.SoftFloat && v.Type.IsFloat() { + f.Fatalf("unexpected floating-point type %v", v.LongString()) + } + // TODO: check for cycles in values // TODO: check type } diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 608037db74..8a2e358c11 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -344,6 +344,7 @@ var passes = [...]pass{ {name: "prove", fn: prove}, {name: "loopbce", fn: loopbce}, {name: "decompose builtin", fn: decomposeBuiltIn, required: true}, + {name: "softfloat", fn: softfloat, required: true}, {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules {name: "generic deadcode", fn: deadcode}, {name: "check bce", fn: checkbce}, @@ -413,6 +414,8 @@ var passOrder = [...]constraint{ {"generic deadcode", "check bce"}, // don't run optimization pass until we've decomposed builtin objects {"decompose builtin", "late opt"}, + // decompose builtin is the last pass that may introduce new float ops, so run softfloat after it + {"decompose builtin", "softfloat"}, // don't layout blocks until critical edges have been removed {"critical", "layout"}, // regalloc requires the removal of all critical edges diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 61ecac4e75..ae6caeea9e 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -36,6 +36,7 @@ type Config struct { useSSE bool // Use SSE for non-float operations nacl bool // GOOS=nacl use387 bool // GO386=387 + SoftFloat bool // NeedsFpScratch bool // No direct move between GP and FP register sets BigEndian bool // sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score diff --git a/src/cmd/compile/internal/ssa/softfloat.go b/src/cmd/compile/internal/ssa/softfloat.go new file mode 100644 index 0000000000..39829b046c --- /dev/null +++ b/src/cmd/compile/internal/ssa/softfloat.go @@ -0,0 +1,66 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import "math" + +func softfloat(f *Func) { + if !f.Config.SoftFloat { + return + } + newInt64 := false + + for _, b := range f.Blocks { + for _, v := range b.Values { + if v.Type.IsFloat() { + switch v.Op { + case OpPhi, OpLoad, OpArg: + if v.Type.Size() == 4 { + v.Type = f.Config.Types.UInt32 + } else { + v.Type = f.Config.Types.UInt64 + } + case OpConst32F: + v.Op = OpConst32 + v.Type = f.Config.Types.UInt32 + v.AuxInt = int64(int32(math.Float32bits(i2f32(v.AuxInt)))) + case OpConst64F: + v.Op = OpConst64 + v.Type = f.Config.Types.UInt64 + case OpNeg32F: + arg0 := v.Args[0] + v.reset(OpXor32) + v.Type = f.Config.Types.UInt32 + v.AddArg(arg0) + mask := v.Block.NewValue0(v.Pos, OpConst32, v.Type) + mask.AuxInt = -0x80000000 + v.AddArg(mask) + case OpNeg64F: + arg0 := v.Args[0] + v.reset(OpXor64) + v.Type = f.Config.Types.UInt64 + v.AddArg(arg0) + mask := v.Block.NewValue0(v.Pos, OpConst64, v.Type) + mask.AuxInt = -0x8000000000000000 + v.AddArg(mask) + case OpRound32F: + v.Op = OpCopy + v.Type = f.Config.Types.UInt32 + case OpRound64F: + v.Op = OpCopy + v.Type = f.Config.Types.UInt64 + } + newInt64 = newInt64 || v.Type.Size() == 8 + } + } + } + + if newInt64 && f.Config.RegSize == 4 { + // On 32bit arch, decompose Uint64 introduced in the switch above. + decomposeBuiltIn(f) + applyRewrite(f, rewriteBlockdec64, rewriteValuedec64) + } + +} -- 2.50.0