From: Michael Munday Date: Wed, 29 Mar 2017 20:37:12 +0000 (-0400) Subject: cmd/compile: add generic rules to eliminate some unnecessary stores X-Git-Tag: go1.9beta1~286 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=4fc498d89a1e7cef854ed95c00ce7fed817e75a4;p=gostls13.git cmd/compile: add generic rules to eliminate some unnecessary stores Eliminates stores of values that have just been loaded from the same location. Handles the common case where there are up to 3 intermediate stores to non-overlapping struct fields. For example the loads and stores of x.a, x.b and x.d in the following function are now removed: type T struct { a, b, c, d int } func f(x *T) { y := *x y.c += 8 *x = y } Before this CL (s390x): TEXT "".f(SB) MOVD "".x(R15), R5 MOVD (R5), R1 MOVD 8(R5), R2 MOVD 16(R5), R0 MOVD 24(R5), R4 ADD $8, R0, R3 STMG R1, R4, (R5) RET After this CL (s390x): TEXT "".f(SB) MOVD "".x(R15), R1 MOVD 16(R1), R0 ADD $8, R0, R0 MOVD R0, 16(R1) RET In total these rules are triggered ~5091 times during all.bash, which is broken down as: Intermediate stores | Triggered --------------------+---------- 0 | 1434 1 | 2508 2 | 888 3 | 261 --------------------+---------- Change-Id: Ia4721ae40146aceec1fdd3e65b0e9283770bfba5 Reviewed-on: https://go-review.googlesource.com/38793 Run-TryBot: Michael Munday TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 322de4de11..b8d7381420 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -691,6 +691,33 @@ // Load of store of same address, with compatibly typed value and same size (Load p1 (Store {t2} p2 x _)) && isSamePtr(p1,p2) && t1.Compare(x.Type) == types.CMPeq && t1.Size() == t2.(*types.Type).Size() -> x +// Eliminate stores of values that have just been loaded from the same location. +// We also handle the common case where there are some intermediate stores to non-overlapping struct fields. +(Store {t1} p1 (Load p2 mem) mem) && + isSamePtr(p1, p2) && + t2.Size() == t1.(*types.Type).Size() -> mem +(Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ oldmem)) && + isSamePtr(p1, p2) && + isSamePtr(p1, p3) && + t2.Size() == t1.(*types.Type).Size() && + !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) -> mem +(Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ (Store {t4} (OffPtr [o4] p4) _ oldmem))) && + isSamePtr(p1, p2) && + isSamePtr(p1, p3) && + isSamePtr(p1, p4) && + t2.Size() == t1.(*types.Type).Size() && + !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && + !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size()) -> mem +(Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ (Store {t4} (OffPtr [o4] p4) _ (Store {t5} (OffPtr [o5] p5) _ oldmem)))) && + isSamePtr(p1, p2) && + isSamePtr(p1, p3) && + isSamePtr(p1, p4) && + isSamePtr(p1, p5) && + t2.Size() == t1.(*types.Type).Size() && + !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && + !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size()) && + !overlap(o1, t2.Size(), o5, t5.(*types.Type).Size()) -> mem + // Collapse OffPtr (OffPtr (OffPtr p [b]) [a]) -> (OffPtr p [a+b]) (OffPtr p [0]) && v.Type.Compare(p.Type) == types.CMPeq -> p diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go index f69ffc8c5c..06595586c1 100644 --- a/src/cmd/compile/internal/ssa/rewrite.go +++ b/src/cmd/compile/internal/ssa/rewrite.go @@ -628,3 +628,15 @@ func isARMImmRot(v uint32) bool { return false } + +// overlap reports whether the ranges given by the given offset and +// size pairs overlap. +func overlap(offset1, size1, offset2, size2 int64) bool { + if offset1 >= offset2 && offset2+size2 > offset1 { + return true + } + if offset2 >= offset1 && offset1+size1 > offset2 { + return true + } + return false +} diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index f41d3fa151..9f67d00404 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -20447,10 +20447,206 @@ func rewriteValuegeneric_OpSqrt_0(v *Value) bool { func rewriteValuegeneric_OpStore_0(v *Value) bool { b := v.Block _ = b - config := b.Func.Config - _ = config fe := b.Func.fe _ = fe + // match: (Store {t1} p1 (Load p2 mem) mem) + // cond: isSamePtr(p1, p2) && t2.Size() == t1.(*types.Type).Size() + // result: mem + for { + t1 := v.Aux + p1 := v.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpLoad { + break + } + t2 := v_1.Type + p2 := v_1.Args[0] + mem := v_1.Args[1] + if mem != v.Args[2] { + break + } + if !(isSamePtr(p1, p2) && t2.Size() == t1.(*types.Type).Size()) { + break + } + v.reset(OpCopy) + v.Type = mem.Type + v.AddArg(mem) + return true + } + // match: (Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ oldmem)) + // cond: isSamePtr(p1, p2) && isSamePtr(p1, p3) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) + // result: mem + for { + t1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpOffPtr { + break + } + o1 := v_0.AuxInt + p1 := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpLoad { + break + } + t2 := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpOffPtr { + break + } + if v_1_0.AuxInt != o1 { + break + } + p2 := v_1_0.Args[0] + oldmem := v_1.Args[1] + mem := v.Args[2] + if mem.Op != OpStore { + break + } + t3 := mem.Aux + mem_0 := mem.Args[0] + if mem_0.Op != OpOffPtr { + break + } + o3 := mem_0.AuxInt + p3 := mem_0.Args[0] + if oldmem != mem.Args[2] { + break + } + if !(isSamePtr(p1, p2) && isSamePtr(p1, p3) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size())) { + break + } + v.reset(OpCopy) + v.Type = mem.Type + v.AddArg(mem) + return true + } + // match: (Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ (Store {t4} (OffPtr [o4] p4) _ oldmem))) + // cond: isSamePtr(p1, p2) && isSamePtr(p1, p3) && isSamePtr(p1, p4) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size()) + // result: mem + for { + t1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpOffPtr { + break + } + o1 := v_0.AuxInt + p1 := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpLoad { + break + } + t2 := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpOffPtr { + break + } + if v_1_0.AuxInt != o1 { + break + } + p2 := v_1_0.Args[0] + oldmem := v_1.Args[1] + mem := v.Args[2] + if mem.Op != OpStore { + break + } + t3 := mem.Aux + mem_0 := mem.Args[0] + if mem_0.Op != OpOffPtr { + break + } + o3 := mem_0.AuxInt + p3 := mem_0.Args[0] + mem_2 := mem.Args[2] + if mem_2.Op != OpStore { + break + } + t4 := mem_2.Aux + mem_2_0 := mem_2.Args[0] + if mem_2_0.Op != OpOffPtr { + break + } + o4 := mem_2_0.AuxInt + p4 := mem_2_0.Args[0] + if oldmem != mem_2.Args[2] { + break + } + if !(isSamePtr(p1, p2) && isSamePtr(p1, p3) && isSamePtr(p1, p4) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size())) { + break + } + v.reset(OpCopy) + v.Type = mem.Type + v.AddArg(mem) + return true + } + // match: (Store {t1} (OffPtr [o1] p1) (Load (OffPtr [o1] p2) oldmem) mem:(Store {t3} (OffPtr [o3] p3) _ (Store {t4} (OffPtr [o4] p4) _ (Store {t5} (OffPtr [o5] p5) _ oldmem)))) + // cond: isSamePtr(p1, p2) && isSamePtr(p1, p3) && isSamePtr(p1, p4) && isSamePtr(p1, p5) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size()) && !overlap(o1, t2.Size(), o5, t5.(*types.Type).Size()) + // result: mem + for { + t1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpOffPtr { + break + } + o1 := v_0.AuxInt + p1 := v_0.Args[0] + v_1 := v.Args[1] + if v_1.Op != OpLoad { + break + } + t2 := v_1.Type + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpOffPtr { + break + } + if v_1_0.AuxInt != o1 { + break + } + p2 := v_1_0.Args[0] + oldmem := v_1.Args[1] + mem := v.Args[2] + if mem.Op != OpStore { + break + } + t3 := mem.Aux + mem_0 := mem.Args[0] + if mem_0.Op != OpOffPtr { + break + } + o3 := mem_0.AuxInt + p3 := mem_0.Args[0] + mem_2 := mem.Args[2] + if mem_2.Op != OpStore { + break + } + t4 := mem_2.Aux + mem_2_0 := mem_2.Args[0] + if mem_2_0.Op != OpOffPtr { + break + } + o4 := mem_2_0.AuxInt + p4 := mem_2_0.Args[0] + mem_2_2 := mem_2.Args[2] + if mem_2_2.Op != OpStore { + break + } + t5 := mem_2_2.Aux + mem_2_2_0 := mem_2_2.Args[0] + if mem_2_2_0.Op != OpOffPtr { + break + } + o5 := mem_2_2_0.AuxInt + p5 := mem_2_2_0.Args[0] + if oldmem != mem_2_2.Args[2] { + break + } + if !(isSamePtr(p1, p2) && isSamePtr(p1, p3) && isSamePtr(p1, p4) && isSamePtr(p1, p5) && t2.Size() == t1.(*types.Type).Size() && !overlap(o1, t2.Size(), o3, t3.(*types.Type).Size()) && !overlap(o1, t2.Size(), o4, t4.(*types.Type).Size()) && !overlap(o1, t2.Size(), o5, t5.(*types.Type).Size())) { + break + } + v.reset(OpCopy) + v.Type = mem.Type + v.AddArg(mem) + return true + } // match: (Store _ (StructMake0) mem) // cond: // result: mem @@ -20633,6 +20829,15 @@ func rewriteValuegeneric_OpStore_0(v *Value) bool { v.AddArg(mem) return true } + return false +} +func rewriteValuegeneric_OpStore_10(v *Value) bool { + b := v.Block + _ = b + config := b.Func.Config + _ = config + fe := b.Func.fe + _ = fe // match: (Store {t} dst (Load src mem) (VarDef {x} mem)) // cond: !fe.CanSSA(t.(*types.Type)) // result: (Move {t} [t.(*types.Type).Size()] dst src (VarDef {x} mem)) @@ -20729,13 +20934,6 @@ func rewriteValuegeneric_OpStore_0(v *Value) bool { v.AddArg(mem) return true } - return false -} -func rewriteValuegeneric_OpStore_10(v *Value) bool { - b := v.Block - _ = b - config := b.Func.Config - _ = config // match: (Store (OffPtr (Load (OffPtr [c] (SP)) mem)) x mem) // cond: isConstZero(x) && mem.Op == OpStaticCall && isSameSym(mem.Aux, "runtime.newobject") && c == config.ctxt.FixedFrameSize() + config.RegSize // result: mem