From 8607b2e825da5bbd91929080ccfdbc20ed9aef96 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sat, 20 Oct 2018 19:59:27 -0700 Subject: [PATCH] cmd/compile: optimize A->B->C Moves that include VarDefs MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We have an existing optimization that recognizes memory moves of the form A -> B -> C and converts them into A -> C, in the hopes that the store to B will be end up being dead and thus eliminated. However, when A, B, and C are large types, the front end sometimes emits VarDef ops for the moves. This change adds an optimization to match that pattern. This required changing an old compiler test. The test assumed that a temporary was required to deal with a large return value. With this optimization in place, that temporary ended up being eliminated. Triggers 649 times during 'go build -a std cmd'. Cuts 16k off cmd/go. name old object-bytes new object-bytes delta Template 507kB ± 0% 507kB ± 0% -0.15% (p=0.008 n=5+5) Unicode 225kB ± 0% 225kB ± 0% ~ (all equal) GoTypes 1.85MB ± 0% 1.85MB ± 0% ~ (all equal) Flate 328kB ± 0% 328kB ± 0% ~ (all equal) GoParser 402kB ± 0% 402kB ± 0% -0.00% (p=0.008 n=5+5) Reflect 1.41MB ± 0% 1.41MB ± 0% -0.20% (p=0.008 n=5+5) Tar 458kB ± 0% 458kB ± 0% ~ (all equal) XML 601kB ± 0% 599kB ± 0% -0.21% (p=0.008 n=5+5) Change-Id: I9b5f25c8663a0b772ad1ee51fa61f74b74d26dd3 Reviewed-on: https://go-review.googlesource.com/c/143479 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Michael Munday --- .../compile/internal/ssa/gen/generic.rules | 9 +++++ .../compile/internal/ssa/rewritegeneric.go | 35 +++++++++++++++++++ test/fixedbugs/issue20780.go | 15 ++++---- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 5f1ed83ad1..5a1bee0fa2 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1815,6 +1815,15 @@ && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) -> (Move {t1} [s] dst src midmem) +// Same, but for large types that require VarDefs. +(Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _))) + && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq + && isSamePtr(tmp1, tmp2) + && isStackPtr(src) + && disjoint(src, s, tmp2, s) + && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + -> (Move {t1} [s] dst src midmem) + // Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go). // However, this rule is needed to prevent the previous rule from looping forever in such cases. (Move dst src mem) && isSamePtr(dst, src) -> mem diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 22e28bed54..f16b571b2a 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -17388,6 +17388,41 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { v.AddArg(midmem) return true } + // match: (Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _))) + // cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + // result: (Move {t1} [s] dst src midmem) + for { + s := v.AuxInt + t1 := v.Aux + _ = v.Args[2] + dst := v.Args[0] + tmp1 := v.Args[1] + midmem := v.Args[2] + if midmem.Op != OpVarDef { + break + } + midmem_0 := midmem.Args[0] + if midmem_0.Op != OpMove { + break + } + if midmem_0.AuxInt != s { + break + } + t2 := midmem_0.Aux + _ = midmem_0.Args[2] + tmp2 := midmem_0.Args[0] + src := midmem_0.Args[1] + if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) { + break + } + v.reset(OpMove) + v.AuxInt = s + v.Aux = t1 + v.AddArg(dst) + v.AddArg(src) + v.AddArg(midmem) + return true + } // match: (Move dst src mem) // cond: isSamePtr(dst, src) // result: mem diff --git a/test/fixedbugs/issue20780.go b/test/fixedbugs/issue20780.go index a31e031b78..58952e53ee 100644 --- a/test/fixedbugs/issue20780.go +++ b/test/fixedbugs/issue20780.go @@ -6,15 +6,14 @@ // We have a limit of 1GB for stack frames. // Make sure we include the callee args section. -// (The dispatch wrapper which implements (*S).f -// copies the return value from f to a stack temp, then -// from that stack temp to the return value of (*S).f. -// It uses ~800MB for each section.) package main -type S struct { - i interface { - f() [800e6]byte - } +func f() { // ERROR "stack frame too large" + var x [800e6]byte + g(x) + return } + +//go:noinline +func g([800e6]byte) {} -- 2.50.0