From: David Chase Date: Fri, 21 Oct 2022 22:10:23 +0000 (-0400) Subject: cmd/compile: add debug-hash flag for fused-multiply-add X-Git-Tag: go1.20rc1~440 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=667c53e1599fbc61b48e3ddc95e2361b8c8fb8a5;p=gostls13.git cmd/compile: add debug-hash flag for fused-multiply-add This adds a -d debug flag "fmahash" for hashcode search for floating point architecture-dependent problems. This variable has no effect on architectures w/o fused-multiply-add. This was rebased onto the GOSSAHASH renovation so that this could have its own dedicated environment variable, and so that it would be cheap (a nil check) to check it in the normal case. Includes a basic test of the trigger plumbing. Sample use (on arm64, ppc64le, s390x): % GOCOMPILEDEBUG=fmahash=001110110 \ go build -o foo cmd/compile/internal/ssa/testdata/fma.go fmahash triggered main.main:24 101111101101111001110110 GOFMAHASH triggered main.main:20 010111010000101110111011 1.0000000000000002 1.0000000000000004 -2.220446049250313e-16 exit status 1 The intended use is in conjunction with github.com/dr2chase/gossahash, which will probably acquire a flag "-fma" to streamline its use. This tool+use was inspired by an ad hoc use of this technique "in anger" to debug this very problem. This is also a dry-run for using this same technique to identify code sensitive to loop variable lifetime/capture, should we make that change. Example intended use, with current search tool (using old environment variable), for a test example: gossahash -e GOFMAHASH GOMAGIC=GOFMAHASH go run fma.go Trying go args=[...], env=[GOFMAHASH=1 GOMAGIC=GOFMAHASH] go failed (81 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=11 GOMAGIC=GOFMAHASH] go failed (39 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=011 GOMAGIC=GOFMAHASH] go failed (18 distinct triggers): exit status 1 Trying go args=[...], env=[GOFMAHASH=0011 GOMAGIC=GOFMAHASH] Trying go args=[...], env=[GOFMAHASH=1011 GOMAGIC=GOFMAHASH] ... Trying go args=[...], env=[GOFMAHASH=0110111011 GOMAGIC=GOFMAHASH] Trying go args=[...], env=[GOFMAHASH=1110111011 GOMAGIC=GOFMAHASH] go failed (2 distinct triggers): exit status 1 Trigger string is 'GOFMAHASH triggered math.qzero:427 111111101010011110111011', repeated 6 times Trigger string is 'GOFMAHASH triggered main.main:20 010111010000101110111011', repeated 1 times Trying go args=[...], env=[GOFMAHASH=01110111011 GOMAGIC=GOFMAHASH] go failed (1 distinct triggers): exit status 1 Trigger string is 'GOFMAHASH triggered main.main:20 010111010000101110111011', repeated 1 times Review GSHS_LAST_FAIL.0.log for failing run FINISHED, suggest this command line for debugging: GOSSAFUNC='main.main:20 010111010000101110111011' \ GOFMAHASH=01110111011 GOMAGIC=GOFMAHASH go run fma.go Change-Id: Ifa22dd8f1c37c18fc8a4f7c396345a364bc367d5 Reviewed-on: https://go-review.googlesource.com/c/go/+/394754 TryBot-Result: Gopher Robot Run-TryBot: David Chase Reviewed-by: Matthew Dempsky --- diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index 6cb7a54cad..25a5c8c98f 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -25,6 +25,7 @@ type DebugFlags struct { DumpPtrs int `help:"show Node pointers values in dump output"` DwarfInl int `help:"print information about DWARF inlined function creation"` Export int `help:"print export data"` + Fmahash string `help:"hash value for use in debugging platform-dependent multiply-add use" concurrent:"ok"` GCProg int `help:"print dump of GC programs"` Gossahash string `help:"hash value for use in debugging the compiler"` InlFuncsWithClosures int `help:"allow functions with closures to be inlined"` diff --git a/src/cmd/compile/internal/base/flag.go b/src/cmd/compile/internal/base/flag.go index 98cfc189ae..6d2847bc06 100644 --- a/src/cmd/compile/internal/base/flag.go +++ b/src/cmd/compile/internal/base/flag.go @@ -190,6 +190,10 @@ func ParseFlags() { hashDebug = NewHashDebug("gosshash", Debug.Gossahash, nil) } + if Debug.Fmahash != "" { + FmaHash = NewHashDebug("fmahash", Debug.Fmahash, nil) + } + if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) { log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH) } diff --git a/src/cmd/compile/internal/base/hashdebug.go b/src/cmd/compile/internal/base/hashdebug.go index 08c4fbcc00..c93d042f71 100644 --- a/src/cmd/compile/internal/base/hashdebug.go +++ b/src/cmd/compile/internal/base/hashdebug.go @@ -38,6 +38,7 @@ type HashDebug struct { // The default compiler-debugging HashDebug, for "-d=gossahash=..." var hashDebug *HashDebug +var FmaHash *HashDebug // DebugHashMatch reports whether debug variable Gossahash // diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 4dafef574e..727204d80a 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -2937,18 +2937,19 @@ (FNEGD (FNMULD x y)) => (FMULD x y) (FNMULS (FNEGS x) y) => (FMULS x y) (FNMULD (FNEGD x) y) => (FMULD x y) -(FADDS a (FMULS x y)) => (FMADDS a x y) -(FADDD a (FMULD x y)) => (FMADDD a x y) -(FSUBS a (FMULS x y)) => (FMSUBS a x y) -(FSUBD a (FMULD x y)) => (FMSUBD a x y) -(FSUBS (FMULS x y) a) => (FNMSUBS a x y) -(FSUBD (FMULD x y) a) => (FNMSUBD a x y) -(FADDS a (FNMULS x y)) => (FMSUBS a x y) -(FADDD a (FNMULD x y)) => (FMSUBD a x y) -(FSUBS a (FNMULS x y)) => (FMADDS a x y) -(FSUBD a (FNMULD x y)) => (FMADDD a x y) -(FSUBS (FNMULS x y) a) => (FNMADDS a x y) -(FSUBD (FNMULD x y) a) => (FNMADDD a x y) + +(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y) +(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y) +(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y) +(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y) +(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y) +(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y) +(FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y) +(FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y) (MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))]) (MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules index 79e633e3e4..aee53d4f0f 100644 --- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules @@ -942,8 +942,8 @@ (FNEG (F(ABS|NABS) x)) => (F(NABS|ABS) x) // floating-point fused multiply-add/sub -(F(ADD|SUB) (FMUL x y) z) => (FM(ADD|SUB) x y z) -(F(ADDS|SUBS) (FMULS x y) z) => (FM(ADDS|SUBS) x y z) +(F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z) +(F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z) // The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store) // and convert the statements in these functions from multiple single byte loads or stores to diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules index 8c48d6f601..e9becb2e17 100644 --- a/src/cmd/compile/internal/ssa/_gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules @@ -1254,8 +1254,8 @@ (C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow) // fused multiply-add -(Select0 (F(ADD|SUB) (FMUL y z) x)) => (FM(ADD|SUB) x y z) -(Select0 (F(ADDS|SUBS) (FMULS y z) x)) => (FM(ADDS|SUBS) x y z) +(Select0 (F(ADD|SUB) (FMUL y z) x)) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z) +(Select0 (F(ADDS|SUBS) (FMULS y z) x)) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z) // Convert floating point comparisons against zero into 'load and test' instructions. (F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x) diff --git a/src/cmd/compile/internal/ssa/fmahash_test.go b/src/cmd/compile/internal/ssa/fmahash_test.go new file mode 100644 index 0000000000..78dd0baea2 --- /dev/null +++ b/src/cmd/compile/internal/ssa/fmahash_test.go @@ -0,0 +1,56 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa_test + +import ( + "internal/testenv" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" +) + +// TestFmaHash checks that the hash-test machinery works properly for a single case. +// It does not check or run the generated code. +// The test file is however a useful example of fused-vs-cascaded multiply-add. +func TestFmaHash(t *testing.T) { + if testing.Short() { + t.Skip("Slow test, usually avoid it, testing.Short") + } + switch runtime.GOOS { + case "linux", "darwin": + default: + t.Skipf("Slow test, usually avoid it, os=%s not linux or darwin", runtime.GOOS) + } + switch runtime.GOARCH { + case "amd64", "arm64": + default: + t.Skipf("Slow test, usually avoid it, arch=%s not amd64 or arm64", runtime.GOARCH) + } + + testenv.MustHaveGoBuild(t) + gocmd := testenv.GoToolPath(t) + tmpdir, err := os.MkdirTemp("", "x") + if err != nil { + t.Error(err) + } + defer os.RemoveAll(tmpdir) + source := filepath.Join("testdata", "fma.go") + output := filepath.Join(tmpdir, "fma.exe") + cmd := exec.Command(gocmd, "build", "-o", output, source) + cmd.Env = append(cmd.Env, "GOCOMPILEDEBUG=fmahash=101111101101111001110110", "GOOS=linux", "GOARCH=arm64", "HOME="+tmpdir) + t.Logf("%v", cmd) + t.Logf("%v", cmd.Env) + b, e := cmd.CombinedOutput() + if e != nil { + t.Error(e) + } + s := string(b) // Looking for "GOFMAHASH triggered main.main:24" + if !strings.Contains(s, "fmahash triggered main.main:24") { + t.Errorf("Expected to see 'fmahash triggered main.main:24' in \n-----\n%s-----", s) + } +} diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index d9a51ac424..18226c42b9 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -801,3 +801,17 @@ func (f *Func) spSb() (sp, sb *Value) { } return } + +// useFMA allows targeted debugging w/ GOFMAHASH +// If you have an architecture-dependent FP glitch, this will help you find it. +func (f *Func) useFMA(v *Value) bool { + if !f.Config.UseFMA { + return false + } + if base.FmaHash == nil { + return true + } + + name := f.fe.MyImportPath() + "." + f.Name + return base.FmaHash.DebugHashMatchParam(name, uint64(v.Pos.Line())) +} diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index e8b3aeb9cb..d7386729e7 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -4342,6 +4342,7 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FADDD a (FMULD x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMADDD a x y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -4351,6 +4352,9 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + continue + } v.reset(OpARM64FMADDD) v.AddArg3(a, x, y) return true @@ -4358,6 +4362,7 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool { break } // match: (FADDD a (FNMULD x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMSUBD a x y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -4367,6 +4372,9 @@ func rewriteValueARM64_OpARM64FADDD(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + continue + } v.reset(OpARM64FMSUBD) v.AddArg3(a, x, y) return true @@ -4379,6 +4387,7 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FADDS a (FMULS x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMADDS a x y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -4388,6 +4397,9 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + continue + } v.reset(OpARM64FMADDS) v.AddArg3(a, x, y) return true @@ -4395,6 +4407,7 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool { break } // match: (FADDS a (FNMULS x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMSUBS a x y) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { @@ -4404,6 +4417,9 @@ func rewriteValueARM64_OpARM64FADDS(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + continue + } v.reset(OpARM64FMSUBS) v.AddArg3(a, x, y) return true @@ -5458,6 +5474,7 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FSUBD a (FMULD x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMSUBD a x y) for { a := v_0 @@ -5466,11 +5483,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FMSUBD) v.AddArg3(a, x, y) return true } // match: (FSUBD (FMULD x y) a) + // cond: a.Block.Func.useFMA(v) // result: (FNMSUBD a x y) for { if v_0.Op != OpARM64FMULD { @@ -5479,11 +5500,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { y := v_0.Args[1] x := v_0.Args[0] a := v_1 + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FNMSUBD) v.AddArg3(a, x, y) return true } // match: (FSUBD a (FNMULD x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMADDD a x y) for { a := v_0 @@ -5492,11 +5517,15 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FMADDD) v.AddArg3(a, x, y) return true } // match: (FSUBD (FNMULD x y) a) + // cond: a.Block.Func.useFMA(v) // result: (FNMADDD a x y) for { if v_0.Op != OpARM64FNMULD { @@ -5505,6 +5534,9 @@ func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { y := v_0.Args[1] x := v_0.Args[0] a := v_1 + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FNMADDD) v.AddArg3(a, x, y) return true @@ -5515,6 +5547,7 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FSUBS a (FMULS x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMSUBS a x y) for { a := v_0 @@ -5523,11 +5556,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FMSUBS) v.AddArg3(a, x, y) return true } // match: (FSUBS (FMULS x y) a) + // cond: a.Block.Func.useFMA(v) // result: (FNMSUBS a x y) for { if v_0.Op != OpARM64FMULS { @@ -5536,11 +5573,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool { y := v_0.Args[1] x := v_0.Args[0] a := v_1 + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FNMSUBS) v.AddArg3(a, x, y) return true } // match: (FSUBS a (FNMULS x y)) + // cond: a.Block.Func.useFMA(v) // result: (FMADDS a x y) for { a := v_0 @@ -5549,11 +5590,15 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool { } y := v_1.Args[1] x := v_1.Args[0] + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FMADDS) v.AddArg3(a, x, y) return true } // match: (FSUBS (FNMULS x y) a) + // cond: a.Block.Func.useFMA(v) // result: (FNMADDS a x y) for { if v_0.Op != OpARM64FNMULS { @@ -5562,6 +5607,9 @@ func rewriteValueARM64_OpARM64FSUBS(v *Value) bool { y := v_0.Args[1] x := v_0.Args[0] a := v_1 + if !(a.Block.Func.useFMA(v)) { + break + } v.reset(OpARM64FNMADDS) v.AddArg3(a, x, y) return true diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index d1bacf1bf4..8b5ea14757 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -4655,18 +4655,27 @@ func rewriteValuePPC64_OpPPC64FADD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FADD (FMUL x y) z) + // cond: x.Block.Func.useFMA(v) // result: (FMADD x y z) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpPPC64FMUL { continue } - y := v_0.Args[1] - x := v_0.Args[0] - z := v_1 - v.reset(OpPPC64FMADD) - v.AddArg3(x, y, z) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + z := v_1 + if !(x.Block.Func.useFMA(v)) { + continue + } + v.reset(OpPPC64FMADD) + v.AddArg3(x, y, z) + return true + } } break } @@ -4676,18 +4685,27 @@ func rewriteValuePPC64_OpPPC64FADDS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FADDS (FMULS x y) z) + // cond: x.Block.Func.useFMA(v) // result: (FMADDS x y z) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { if v_0.Op != OpPPC64FMULS { continue } - y := v_0.Args[1] - x := v_0.Args[0] - z := v_1 - v.reset(OpPPC64FMADDS) - v.AddArg3(x, y, z) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + z := v_1 + if !(x.Block.Func.useFMA(v)) { + continue + } + v.reset(OpPPC64FMADDS) + v.AddArg3(x, y, z) + return true + } } break } @@ -5078,17 +5096,27 @@ func rewriteValuePPC64_OpPPC64FSUB(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FSUB (FMUL x y) z) + // cond: x.Block.Func.useFMA(v) // result: (FMSUB x y z) for { if v_0.Op != OpPPC64FMUL { break } - y := v_0.Args[1] - x := v_0.Args[0] - z := v_1 - v.reset(OpPPC64FMSUB) - v.AddArg3(x, y, z) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + z := v_1 + if !(x.Block.Func.useFMA(v)) { + continue + } + v.reset(OpPPC64FMSUB) + v.AddArg3(x, y, z) + return true + } + break } return false } @@ -5096,17 +5124,27 @@ func rewriteValuePPC64_OpPPC64FSUBS(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (FSUBS (FMULS x y) z) + // cond: x.Block.Func.useFMA(v) // result: (FMSUBS x y z) for { if v_0.Op != OpPPC64FMULS { break } - y := v_0.Args[1] - x := v_0.Args[0] - z := v_1 - v.reset(OpPPC64FMSUBS) - v.AddArg3(x, y, z) - return true + _ = v_0.Args[1] + v_0_0 := v_0.Args[0] + v_0_1 := v_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 { + x := v_0_0 + y := v_0_1 + z := v_1 + if !(x.Block.Func.useFMA(v)) { + continue + } + v.reset(OpPPC64FMSUBS) + v.AddArg3(x, y, z) + return true + } + break } return false } diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go index db1747689d..8f40ecdc81 100644 --- a/src/cmd/compile/internal/ssa/rewriteS390X.go +++ b/src/cmd/compile/internal/ssa/rewriteS390X.go @@ -15335,6 +15335,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { return true } // match: (Select0 (FADD (FMUL y z) x)) + // cond: x.Block.Func.useFMA(v) // result: (FMADD x y z) for { if v_0.Op != OpS390XFADD { @@ -15350,6 +15351,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { z := v_0_0.Args[1] y := v_0_0.Args[0] x := v_0_1 + if !(x.Block.Func.useFMA(v)) { + continue + } v.reset(OpS390XFMADD) v.AddArg3(x, y, z) return true @@ -15357,6 +15361,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { break } // match: (Select0 (FSUB (FMUL y z) x)) + // cond: x.Block.Func.useFMA(v) // result: (FMSUB x y z) for { if v_0.Op != OpS390XFSUB { @@ -15369,11 +15374,15 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { } z := v_0_0.Args[1] y := v_0_0.Args[0] + if !(x.Block.Func.useFMA(v)) { + break + } v.reset(OpS390XFMSUB) v.AddArg3(x, y, z) return true } // match: (Select0 (FADDS (FMULS y z) x)) + // cond: x.Block.Func.useFMA(v) // result: (FMADDS x y z) for { if v_0.Op != OpS390XFADDS { @@ -15389,6 +15398,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { z := v_0_0.Args[1] y := v_0_0.Args[0] x := v_0_1 + if !(x.Block.Func.useFMA(v)) { + continue + } v.reset(OpS390XFMADDS) v.AddArg3(x, y, z) return true @@ -15396,6 +15408,7 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { break } // match: (Select0 (FSUBS (FMULS y z) x)) + // cond: x.Block.Func.useFMA(v) // result: (FMSUBS x y z) for { if v_0.Op != OpS390XFSUBS { @@ -15408,6 +15421,9 @@ func rewriteValueS390X_OpSelect0(v *Value) bool { } z := v_0_0.Args[1] y := v_0_0.Args[0] + if !(x.Block.Func.useFMA(v)) { + break + } v.reset(OpS390XFMSUBS) v.AddArg3(x, y, z) return true diff --git a/src/cmd/compile/internal/ssa/testdata/fma.go b/src/cmd/compile/internal/ssa/testdata/fma.go new file mode 100644 index 0000000000..468448b9e6 --- /dev/null +++ b/src/cmd/compile/internal/ssa/testdata/fma.go @@ -0,0 +1,31 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "fmt" + "os" +) + +//go:noinline +func f(x float64) float64 { + return x +} + +func main() { + w, x, y := 1.0, 1.0, 1.0 + x = f(x + x/(1<<52)) + w = f(w / (1 << 27)) + y = f(y + y/(1<<52)) + w0 := f(2 * w * (1 - w)) + w1 := f(w * (1 + w)) + x = x + w0*w1 // GOFMAHASH=101111101101111001110110 + y = y + f(w0*w1) + fmt.Println(x, y, x-y) + + if x != y { + os.Exit(1) + } +}