This adds a -d debug flag "fmahash" for hashcode search for
floating point architecture-dependent problems. This variable has no
effect on architectures w/o fused-multiply-add.
This was rebased onto the GOSSAHASH renovation so that this could have
its own dedicated environment variable, and so that it would be
cheap (a nil check) to check it in the normal case.
Includes a basic test of the trigger plumbing.
Sample use (on arm64, ppc64le, s390x):
% GOCOMPILEDEBUG=fmahash=
001110110 \
go build -o foo cmd/compile/internal/ssa/testdata/fma.go
fmahash triggered main.main:24
101111101101111001110110
GOFMAHASH triggered main.main:20
010111010000101110111011
1.
0000000000000002 1.
0000000000000004 -2.
220446049250313e-16
exit status 1
The intended use is in conjunction with github.com/dr2chase/gossahash,
which will probably acquire a flag "-fma" to streamline its use. This
tool+use was inspired by an ad hoc use of this technique "in anger"
to debug this very problem. This is also a dry-run for using this
same technique to identify code sensitive to loop variable
lifetime/capture, should we make that change.
Example intended use, with current search tool (using old environment
variable), for a test example:
gossahash -e GOFMAHASH GOMAGIC=GOFMAHASH go run fma.go
Trying go args=[...], env=[GOFMAHASH=1 GOMAGIC=GOFMAHASH]
go failed (81 distinct triggers): exit status 1
Trying go args=[...], env=[GOFMAHASH=11 GOMAGIC=GOFMAHASH]
go failed (39 distinct triggers): exit status 1
Trying go args=[...], env=[GOFMAHASH=011 GOMAGIC=GOFMAHASH]
go failed (18 distinct triggers): exit status 1
Trying go args=[...], env=[GOFMAHASH=0011 GOMAGIC=GOFMAHASH]
Trying go args=[...], env=[GOFMAHASH=1011 GOMAGIC=GOFMAHASH]
...
Trying go args=[...], env=[GOFMAHASH=
0110111011 GOMAGIC=GOFMAHASH]
Trying go args=[...], env=[GOFMAHASH=
1110111011 GOMAGIC=GOFMAHASH]
go failed (2 distinct triggers): exit status 1
Trigger string is 'GOFMAHASH triggered math.qzero:427
111111101010011110111011', repeated 6 times
Trigger string is 'GOFMAHASH triggered main.main:20
010111010000101110111011', repeated 1 times
Trying go args=[...], env=[GOFMAHASH=
01110111011 GOMAGIC=GOFMAHASH]
go failed (1 distinct triggers): exit status 1
Trigger string is 'GOFMAHASH triggered main.main:20
010111010000101110111011', repeated 1 times
Review GSHS_LAST_FAIL.0.log for failing run
FINISHED, suggest this command line for debugging:
GOSSAFUNC='main.main:20
010111010000101110111011' \
GOFMAHASH=
01110111011 GOMAGIC=GOFMAHASH go run fma.go
Change-Id: Ifa22dd8f1c37c18fc8a4f7c396345a364bc367d5
Reviewed-on: https://go-review.googlesource.com/c/go/+/394754
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
DumpPtrs int `help:"show Node pointers values in dump output"`
DwarfInl int `help:"print information about DWARF inlined function creation"`
Export int `help:"print export data"`
+ Fmahash string `help:"hash value for use in debugging platform-dependent multiply-add use" concurrent:"ok"`
GCProg int `help:"print dump of GC programs"`
Gossahash string `help:"hash value for use in debugging the compiler"`
InlFuncsWithClosures int `help:"allow functions with closures to be inlined"`
hashDebug = NewHashDebug("gosshash", Debug.Gossahash, nil)
}
+ if Debug.Fmahash != "" {
+ FmaHash = NewHashDebug("fmahash", Debug.Fmahash, nil)
+ }
+
if Flag.MSan && !platform.MSanSupported(buildcfg.GOOS, buildcfg.GOARCH) {
log.Fatalf("%s/%s does not support -msan", buildcfg.GOOS, buildcfg.GOARCH)
}
// The default compiler-debugging HashDebug, for "-d=gossahash=..."
var hashDebug *HashDebug
+var FmaHash *HashDebug
// DebugHashMatch reports whether debug variable Gossahash
//
(FNEGD (FNMULD x y)) => (FMULD x y)
(FNMULS (FNEGS x) y) => (FMULS x y)
(FNMULD (FNEGD x) y) => (FMULD x y)
-(FADDS a (FMULS x y)) => (FMADDS a x y)
-(FADDD a (FMULD x y)) => (FMADDD a x y)
-(FSUBS a (FMULS x y)) => (FMSUBS a x y)
-(FSUBD a (FMULD x y)) => (FMSUBD a x y)
-(FSUBS (FMULS x y) a) => (FNMSUBS a x y)
-(FSUBD (FMULD x y) a) => (FNMSUBD a x y)
-(FADDS a (FNMULS x y)) => (FMSUBS a x y)
-(FADDD a (FNMULD x y)) => (FMSUBD a x y)
-(FSUBS a (FNMULS x y)) => (FMADDS a x y)
-(FSUBD a (FNMULD x y)) => (FMADDD a x y)
-(FSUBS (FNMULS x y) a) => (FNMADDS a x y)
-(FSUBD (FNMULD x y) a) => (FNMADDD a x y)
+
+(FADDS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
+(FADDD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
+(FSUBS a (FMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
+(FSUBD a (FMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
+(FSUBS (FMULS x y) a) && a.Block.Func.useFMA(v) => (FNMSUBS a x y)
+(FSUBD (FMULD x y) a) && a.Block.Func.useFMA(v) => (FNMSUBD a x y)
+(FADDS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMSUBS a x y)
+(FADDD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMSUBD a x y)
+(FSUBS a (FNMULS x y)) && a.Block.Func.useFMA(v) => (FMADDS a x y)
+(FSUBD a (FNMULD x y)) && a.Block.Func.useFMA(v) => (FMADDD a x y)
+(FSUBS (FNMULS x y) a) && a.Block.Func.useFMA(v) => (FNMADDS a x y)
+(FSUBD (FNMULD x y) a) && a.Block.Func.useFMA(v) => (FNMADDD a x y)
(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read8(sym, int64(off)))])
(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVDconst [int64(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
(FNEG (F(ABS|NABS) x)) => (F(NABS|ABS) x)
// floating-point fused multiply-add/sub
-(F(ADD|SUB) (FMUL x y) z) => (FM(ADD|SUB) x y z)
-(F(ADDS|SUBS) (FMULS x y) z) => (FM(ADDS|SUBS) x y z)
+(F(ADD|SUB) (FMUL x y) z) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
+(F(ADDS|SUBS) (FMULS x y) z) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
// The following statements are found in encoding/binary functions UintXX (load) and PutUintXX (store)
// and convert the statements in these functions from multiple single byte loads or stores to
(C(G|LG)IJ {s390x.Greater} (NEG (Select0 (SUBE (MOVDconst [0]) (MOVDconst [0]) borrow))) [0]) => (BRC {s390x.Borrow} borrow)
// fused multiply-add
-(Select0 (F(ADD|SUB) (FMUL y z) x)) => (FM(ADD|SUB) x y z)
-(Select0 (F(ADDS|SUBS) (FMULS y z) x)) => (FM(ADDS|SUBS) x y z)
+(Select0 (F(ADD|SUB) (FMUL y z) x)) && x.Block.Func.useFMA(v) => (FM(ADD|SUB) x y z)
+(Select0 (F(ADDS|SUBS) (FMULS y z) x)) && x.Block.Func.useFMA(v) => (FM(ADDS|SUBS) x y z)
// Convert floating point comparisons against zero into 'load and test' instructions.
(F(CMP|CMPS) x (FMOV(D|S)const [0.0])) => (LT(D|E)BR x)
--- /dev/null
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa_test
+
+import (
+ "internal/testenv"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "runtime"
+ "strings"
+ "testing"
+)
+
+// TestFmaHash checks that the hash-test machinery works properly for a single case.
+// It does not check or run the generated code.
+// The test file is however a useful example of fused-vs-cascaded multiply-add.
+func TestFmaHash(t *testing.T) {
+ if testing.Short() {
+ t.Skip("Slow test, usually avoid it, testing.Short")
+ }
+ switch runtime.GOOS {
+ case "linux", "darwin":
+ default:
+ t.Skipf("Slow test, usually avoid it, os=%s not linux or darwin", runtime.GOOS)
+ }
+ switch runtime.GOARCH {
+ case "amd64", "arm64":
+ default:
+ t.Skipf("Slow test, usually avoid it, arch=%s not amd64 or arm64", runtime.GOARCH)
+ }
+
+ testenv.MustHaveGoBuild(t)
+ gocmd := testenv.GoToolPath(t)
+ tmpdir, err := os.MkdirTemp("", "x")
+ if err != nil {
+ t.Error(err)
+ }
+ defer os.RemoveAll(tmpdir)
+ source := filepath.Join("testdata", "fma.go")
+ output := filepath.Join(tmpdir, "fma.exe")
+ cmd := exec.Command(gocmd, "build", "-o", output, source)
+ cmd.Env = append(cmd.Env, "GOCOMPILEDEBUG=fmahash=101111101101111001110110", "GOOS=linux", "GOARCH=arm64", "HOME="+tmpdir)
+ t.Logf("%v", cmd)
+ t.Logf("%v", cmd.Env)
+ b, e := cmd.CombinedOutput()
+ if e != nil {
+ t.Error(e)
+ }
+ s := string(b) // Looking for "GOFMAHASH triggered main.main:24"
+ if !strings.Contains(s, "fmahash triggered main.main:24") {
+ t.Errorf("Expected to see 'fmahash triggered main.main:24' in \n-----\n%s-----", s)
+ }
+}
}
return
}
+
+// useFMA allows targeted debugging w/ GOFMAHASH
+// If you have an architecture-dependent FP glitch, this will help you find it.
+func (f *Func) useFMA(v *Value) bool {
+ if !f.Config.UseFMA {
+ return false
+ }
+ if base.FmaHash == nil {
+ return true
+ }
+
+ name := f.fe.MyImportPath() + "." + f.Name
+ return base.FmaHash.DebugHashMatchParam(name, uint64(v.Pos.Line()))
+}
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FADDD a (FMULD x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMADDD a x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpARM64FMADDD)
v.AddArg3(a, x, y)
return true
break
}
// match: (FADDD a (FNMULD x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMSUBD a x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpARM64FMSUBD)
v.AddArg3(a, x, y)
return true
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FADDS a (FMULS x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMADDS a x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpARM64FMADDS)
v.AddArg3(a, x, y)
return true
break
}
// match: (FADDS a (FNMULS x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMSUBS a x y)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpARM64FMSUBS)
v.AddArg3(a, x, y)
return true
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FSUBD a (FMULD x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMSUBD a x y)
for {
a := v_0
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FMSUBD)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBD (FMULD x y) a)
+ // cond: a.Block.Func.useFMA(v)
// result: (FNMSUBD a x y)
for {
if v_0.Op != OpARM64FMULD {
y := v_0.Args[1]
x := v_0.Args[0]
a := v_1
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FNMSUBD)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBD a (FNMULD x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMADDD a x y)
for {
a := v_0
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FMADDD)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBD (FNMULD x y) a)
+ // cond: a.Block.Func.useFMA(v)
// result: (FNMADDD a x y)
for {
if v_0.Op != OpARM64FNMULD {
y := v_0.Args[1]
x := v_0.Args[0]
a := v_1
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FNMADDD)
v.AddArg3(a, x, y)
return true
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FSUBS a (FMULS x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMSUBS a x y)
for {
a := v_0
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FMSUBS)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBS (FMULS x y) a)
+ // cond: a.Block.Func.useFMA(v)
// result: (FNMSUBS a x y)
for {
if v_0.Op != OpARM64FMULS {
y := v_0.Args[1]
x := v_0.Args[0]
a := v_1
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FNMSUBS)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBS a (FNMULS x y))
+ // cond: a.Block.Func.useFMA(v)
// result: (FMADDS a x y)
for {
a := v_0
}
y := v_1.Args[1]
x := v_1.Args[0]
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FMADDS)
v.AddArg3(a, x, y)
return true
}
// match: (FSUBS (FNMULS x y) a)
+ // cond: a.Block.Func.useFMA(v)
// result: (FNMADDS a x y)
for {
if v_0.Op != OpARM64FNMULS {
y := v_0.Args[1]
x := v_0.Args[0]
a := v_1
+ if !(a.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpARM64FNMADDS)
v.AddArg3(a, x, y)
return true
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FADD (FMUL x y) z)
+ // cond: x.Block.Func.useFMA(v)
// result: (FMADD x y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64FMUL {
continue
}
- y := v_0.Args[1]
- x := v_0.Args[0]
- z := v_1
- v.reset(OpPPC64FMADD)
- v.AddArg3(x, y, z)
- return true
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+ x := v_0_0
+ y := v_0_1
+ z := v_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpPPC64FMADD)
+ v.AddArg3(x, y, z)
+ return true
+ }
}
break
}
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FADDS (FMULS x y) z)
+ // cond: x.Block.Func.useFMA(v)
// result: (FMADDS x y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64FMULS {
continue
}
- y := v_0.Args[1]
- x := v_0.Args[0]
- z := v_1
- v.reset(OpPPC64FMADDS)
- v.AddArg3(x, y, z)
- return true
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+ x := v_0_0
+ y := v_0_1
+ z := v_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpPPC64FMADDS)
+ v.AddArg3(x, y, z)
+ return true
+ }
}
break
}
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FSUB (FMUL x y) z)
+ // cond: x.Block.Func.useFMA(v)
// result: (FMSUB x y z)
for {
if v_0.Op != OpPPC64FMUL {
break
}
- y := v_0.Args[1]
- x := v_0.Args[0]
- z := v_1
- v.reset(OpPPC64FMSUB)
- v.AddArg3(x, y, z)
- return true
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ y := v_0_1
+ z := v_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpPPC64FMSUB)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
}
return false
}
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (FSUBS (FMULS x y) z)
+ // cond: x.Block.Func.useFMA(v)
// result: (FMSUBS x y z)
for {
if v_0.Op != OpPPC64FMULS {
break
}
- y := v_0.Args[1]
- x := v_0.Args[0]
- z := v_1
- v.reset(OpPPC64FMSUBS)
- v.AddArg3(x, y, z)
- return true
+ _ = v_0.Args[1]
+ v_0_0 := v_0.Args[0]
+ v_0_1 := v_0.Args[1]
+ for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+ x := v_0_0
+ y := v_0_1
+ z := v_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
+ v.reset(OpPPC64FMSUBS)
+ v.AddArg3(x, y, z)
+ return true
+ }
+ break
}
return false
}
return true
}
// match: (Select0 (FADD (FMUL y z) x))
+ // cond: x.Block.Func.useFMA(v)
// result: (FMADD x y z)
for {
if v_0.Op != OpS390XFADD {
z := v_0_0.Args[1]
y := v_0_0.Args[0]
x := v_0_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpS390XFMADD)
v.AddArg3(x, y, z)
return true
break
}
// match: (Select0 (FSUB (FMUL y z) x))
+ // cond: x.Block.Func.useFMA(v)
// result: (FMSUB x y z)
for {
if v_0.Op != OpS390XFSUB {
}
z := v_0_0.Args[1]
y := v_0_0.Args[0]
+ if !(x.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpS390XFMSUB)
v.AddArg3(x, y, z)
return true
}
// match: (Select0 (FADDS (FMULS y z) x))
+ // cond: x.Block.Func.useFMA(v)
// result: (FMADDS x y z)
for {
if v_0.Op != OpS390XFADDS {
z := v_0_0.Args[1]
y := v_0_0.Args[0]
x := v_0_1
+ if !(x.Block.Func.useFMA(v)) {
+ continue
+ }
v.reset(OpS390XFMADDS)
v.AddArg3(x, y, z)
return true
break
}
// match: (Select0 (FSUBS (FMULS y z) x))
+ // cond: x.Block.Func.useFMA(v)
// result: (FMSUBS x y z)
for {
if v_0.Op != OpS390XFSUBS {
}
z := v_0_0.Args[1]
y := v_0_0.Args[0]
+ if !(x.Block.Func.useFMA(v)) {
+ break
+ }
v.reset(OpS390XFMSUBS)
v.AddArg3(x, y, z)
return true
--- /dev/null
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "fmt"
+ "os"
+)
+
+//go:noinline
+func f(x float64) float64 {
+ return x
+}
+
+func main() {
+ w, x, y := 1.0, 1.0, 1.0
+ x = f(x + x/(1<<52))
+ w = f(w / (1 << 27))
+ y = f(y + y/(1<<52))
+ w0 := f(2 * w * (1 - w))
+ w1 := f(w * (1 + w))
+ x = x + w0*w1 // GOFMAHASH=101111101101111001110110
+ y = y + f(w0*w1)
+ fmt.Println(x, y, x-y)
+
+ if x != y {
+ os.Exit(1)
+ }
+}