From: Josh Bleecher Snyder Date: Sun, 28 Oct 2018 19:01:11 +0000 (-0700) Subject: cmd/compile: only optimize chained Moves on disjoint stack mem X-Git-Tag: go1.12beta1~440 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a98bb7e244954f1e035e9b9b6868a92cff04089c;p=gostls13.git cmd/compile: only optimize chained Moves on disjoint stack mem This optimization is not sound if A, B, or C might overlap with each other. Thanks to Michael Munday for pointing this out during the review of CL 143479. This reduces the number of times this optimization triggers during make.bash from 386 to 74. This is unfortunate, but I don't see an obvious way around it, short of souping up the disjointness analysis. name old object-bytes new object-bytes delta Template 507kB ± 0% 507kB ± 0% +0.13% (p=0.008 n=5+5) Unicode 225kB ± 0% 225kB ± 0% ~ (all equal) GoTypes 1.85MB ± 0% 1.85MB ± 0% +0.02% (p=0.008 n=5+5) Flate 328kB ± 0% 328kB ± 0% ~ (all equal) GoParser 402kB ± 0% 402kB ± 0% ~ (all equal) Reflect 1.41MB ± 0% 1.41MB ± 0% ~ (all equal) Tar 457kB ± 0% 458kB ± 0% +0.20% (p=0.008 n=5+5) XML 600kB ± 0% 601kB ± 0% +0.03% (p=0.008 n=5+5) Change-Id: Ida408cb627145ba9faf473a78606f050c2f3f51c Reviewed-on: https://go-review.googlesource.com/c/145208 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Michael Munday --- diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 14a67846dc..5f1ed83ad1 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1804,11 +1804,16 @@ // Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible. // This happens most commonly when B is an autotmp inserted earlier // during compilation to ensure correctness. -(Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) - && s1 == s2 +// Take care that overlapping moves are preserved. +// Restrict this optimization to the stack, to avoid duplicating loads from the heap; +// see CL 145208 for discussion. +(Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _)) && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) - -> (Move {t1} [s1] dst src midmem) + && isStackPtr(src) + && disjoint(src, s, tmp2, s) + && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + -> (Move {t1} [s] dst src midmem) // Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go). // However, this rule is needed to prevent the previous rule from looping forever in such cases. diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index 17d7cb3414..7ddf215478 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -530,6 +530,13 @@ func isSamePtr(p1, p2 *Value) bool { return false } +func isStackPtr(v *Value) bool { + for v.Op == OpOffPtr || v.Op == OpAddPtr { + v = v.Args[0] + } + return v.Op == OpSP || v.Op == OpLocalAddr +} + // disjoint reports whether the memory region specified by [p1:p1+n1) // does not overlap with [p2:p2+n2). // A return value of false does not imply the regions overlap. diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 7869fec21f..22e28bed54 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -17234,6 +17234,8 @@ func rewriteValuegeneric_OpMove_10(v *Value) bool { func rewriteValuegeneric_OpMove_20(v *Value) bool { b := v.Block _ = b + config := b.Func.Config + _ = config // match: (Move {t1} [n] dst p1 mem:(VarDef (Store {t2} (OffPtr [o2] p2) d1 (Store {t3} (OffPtr [o3] p3) d2 (Store {t4} (OffPtr [o4] p4) d3 (Store {t5} (OffPtr [o5] p5) d4 (Zero {t6} [n] p6 _))))))) // cond: isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6) && alignof(t2) <= alignof(t1) && alignof(t3) <= alignof(t1) && alignof(t4) <= alignof(t1) && alignof(t5) <= alignof(t1) && alignof(t6) <= alignof(t1) && registerizable(b, t2) && registerizable(b, t3) && registerizable(b, t4) && registerizable(b, t5) && n >= o2 + sizeof(t2) && n >= o3 + sizeof(t3) && n >= o4 + sizeof(t4) && n >= o5 + sizeof(t5) // result: (Store {t2} (OffPtr [o2] dst) d1 (Store {t3} (OffPtr [o3] dst) d2 (Store {t4} (OffPtr [o4] dst) d3 (Store {t5} (OffPtr [o5] dst) d4 (Zero {t1} [n] dst mem))))) @@ -17355,11 +17357,11 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { v.AddArg(v1) return true } - // match: (Move {t1} [s1] dst tmp1 midmem:(Move {t2} [s2] tmp2 src _)) - // cond: s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) - // result: (Move {t1} [s1] dst src midmem) + // match: (Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _)) + // cond: t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config)) + // result: (Move {t1} [s] dst src midmem) for { - s1 := v.AuxInt + s := v.AuxInt t1 := v.Aux _ = v.Args[2] dst := v.Args[0] @@ -17368,16 +17370,18 @@ func rewriteValuegeneric_OpMove_20(v *Value) bool { if midmem.Op != OpMove { break } - s2 := midmem.AuxInt + if midmem.AuxInt != s { + break + } t2 := midmem.Aux _ = midmem.Args[2] tmp2 := midmem.Args[0] src := midmem.Args[1] - if !(s1 == s2 && t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2)) { + if !(t1.(*types.Type).Compare(t2.(*types.Type)) == types.CMPeq && isSamePtr(tmp1, tmp2) && isStackPtr(src) && disjoint(src, s, tmp2, s) && (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))) { break } v.reset(OpMove) - v.AuxInt = s1 + v.AuxInt = s v.Aux = t1 v.AddArg(dst) v.AddArg(src)