These passes do not modify the dominator tree too much.
% benchstat old.txt new.txt
name old time/op new time/op delta
Template 335ms ± 3% 325ms ± 8% ~ (p=0.074 n=8+9)
GoTypes 1.05s ± 1% 1.05s ± 3% ~ (p=0.095 n=9+10)
Compiler 5.37s ± 4% 5.29s ± 1% -1.42% (p=0.022 n=9+10)
MakeBash 34.9s ± 3% 34.4s ± 2% ~ (p=0.095 n=9+10)
name old alloc/op new alloc/op delta
Template 55.4MB ± 0% 54.9MB ± 0% -0.81% (p=0.000 n=10+10)
GoTypes 179MB ± 0% 178MB ± 0% -0.89% (p=0.000 n=10+10)
Compiler 807MB ± 0% 798MB ± 0% -1.10% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
Template 498k ± 0% 496k ± 0% -0.29% (p=0.000 n=9+9)
GoTypes 1.42M ± 0% 1.41M ± 0% -0.24% (p=0.000 n=10+10)
Compiler 5.61M ± 0% 5.60M ± 0% -0.12% (p=0.000 n=10+10)
Change-Id: I4cd20cfba3f132ebf371e16046ab14d7e42799ec
Reviewed-on: https://go-review.googlesource.com/21806
Run-TryBot: Alexandru Moșoi <alexandru@mosoi.ro>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
{name: "zero arg cse", fn: zcse, required: true}, // required to merge OpSB values
{name: "opt deadcode", fn: deadcode, required: true}, // remove any blocks orphaned during opt
{name: "generic cse", fn: cse},
+ {name: "generic domtree", fn: domTree},
{name: "phiopt", fn: phiopt},
{name: "nilcheckelim", fn: nilcheckelim},
{name: "prove", fn: prove},
{"opt", "nilcheckelim"},
// tighten should happen before lowering to avoid splitting naturally paired instructions such as CMP/SET
{"tighten", "lower"},
+ // nilcheckelim, prove and loopbce share idom.
+ {"generic domtree", "nilcheckelim"},
+ {"generic domtree", "prove"},
+ {"generic domtree", "loopbce"},
// tighten will be most effective when as many values have been removed as possible
{"generic deadcode", "tighten"},
{"generic cse", "tighten"},
}
return b
}
+
+// build immediate dominators.
+func domTree(f *Func) {
+ f.idom = dominators(f)
+ f.sdom = newSparseTree(f, f.idom)
+}
freeValues *Value // free Values linked by argstorage[0]. All other fields except ID are 0/nil.
freeBlocks *Block // free Blocks linked by succstorage[0]. All other fields except ID are 0/nil.
+ idom []*Block // precomputed immediate dominators
+ sdom sparseTree // precomputed dominator tree
+
constants map[int64][]*Value // constants cache, keyed by constant value; users must check value's Op and Type
}
//
//
// TODO: handle 32 bit operations
-func findIndVar(f *Func, sdom sparseTree) []indVar {
+func findIndVar(f *Func) []indVar {
var iv []indVar
nextb:
// Second condition: b.Succs[entry] dominates nxt so that
// nxt is computed when inc < max, meaning nxt <= max.
- if !sdom.isAncestorEq(b.Succs[entry], nxt.Block) {
+ if !f.sdom.isAncestorEq(b.Succs[entry], nxt.Block) {
// inc+ind can only be reached through the branch that enters the loop.
continue
}
// loopbce performs loop based bounds check elimination.
func loopbce(f *Func) {
- idom := dominators(f)
- sdom := newSparseTree(f, idom)
- ivList := findIndVar(f, sdom)
+ ivList := findIndVar(f)
m := make(map[*Value]indVar)
for _, iv := range ivList {
m[iv.ind] = iv
}
- removeBoundsChecks(f, sdom, m)
+ removeBoundsChecks(f, m)
}
// removesBoundsChecks remove IsInBounds and IsSliceInBounds based on the induction variables.
-func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) {
+func removeBoundsChecks(f *Func, m map[*Value]indVar) {
for _, b := range f.Blocks {
if b.Kind != BlockIf {
continue
goto skip1
}
- if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+ if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
if v.Args[1] == iv.max {
if f.pass.debug > 0 {
f.Config.Warnl(b.Line, "Found redundant %s", v.Op)
goto skip2
}
- if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+ if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
if v.Args[1].Op == OpSliceCap && iv.max.Op == OpSliceLen && v.Args[1].Args[0] == iv.max.Args[0] {
if f.pass.debug > 0 {
f.Config.Warnl(b.Line, "Found redundant %s (len promoted to cap)", v.Op)
}
// ind + add >= 0 <-> min + add >= 0 <-> min >= -add
- if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isGreaterOrEqualThan(iv.min, -add) {
+ if iv, has := m[ind]; has && f.sdom.isAncestorEq(iv.entry, b) && isGreaterOrEqualThan(iv.min, -add) {
if !v.Args[1].isGenericIntConst() || !iv.max.isGenericIntConst() {
goto skip3
}
// A nil check is redundant if the same nil check was successful in a
// dominating block. The efficacy of this pass depends heavily on the
// efficacy of the cse pass.
- idom := dominators(f)
+ idom := f.idom
domTree := make([][]*Block, f.NumBlocks())
// Create a block ID -> [dominees] mapping
b.ReportAllocs()
for i := 0; i < b.N; i++ {
+ domTree(fun.f)
nilcheckelim(fun.f)
}
}
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Goto("exit")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
Exit("mem")))
CheckFunc(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
CheckFunc(fun.f)
// we need the opt here to rewrite the user nilcheck
opt(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
CheckFunc(fun.f)
// we need the opt here to rewrite the user nilcheck
opt(fun.f)
+ domTree(fun.f)
nilcheckelim(fun.f)
// clean up the removed nil check
// else branch of the first comparison is executed, we already know that i < len(a).
// The code for the second panic can be removed.
func prove(f *Func) {
- idom := dominators(f)
- sdom := newSparseTree(f, idom)
-
// current node state
type walkState int
const (
for len(work) > 0 {
node := work[len(work)-1]
work = work[:len(work)-1]
- parent := idom[node.block.ID]
- branch := getBranch(sdom, parent, node.block)
+ parent := f.idom[node.block.ID]
+ branch := getBranch(f.sdom, parent, node.block)
switch node.state {
case descend:
block: node.block,
state: simplify,
})
- for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) {
+ for s := f.sdom.Child(node.block); s != nil; s = f.sdom.Sibling(s) {
work = append(work, bp{
block: s,
state: descend,