From b86cafc7dced537165a7cda61b90feae44796055 Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 10 Feb 2016 17:43:31 -0500 Subject: [PATCH] [dev.ssa] cmd/compile: memory allocation tweaks to regalloc and dom Spotted a minor source of excess allocation in the register allocator. Rearranged the dominator tree code to pull its scratch memory from a reused buffer attached to Config. Change-Id: I6da6e7b112f7d3eb1fd00c58faa8214cdea44e38 Reviewed-on: https://go-review.googlesource.com/19450 Reviewed-by: Keith Randall Run-TryBot: David Chase TryBot-Result: Gobot Gobot --- src/cmd/compile/internal/ssa/config.go | 3 +- src/cmd/compile/internal/ssa/dom.go | 76 +++++++++++++++++++----- src/cmd/compile/internal/ssa/regalloc.go | 15 ++++- 3 files changed, 74 insertions(+), 20 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 530c480004..81061a7219 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -24,7 +24,8 @@ type Config struct { values [2000]Value blocks [200]Block - scrSparse []*sparseSet // scratch sparse sets to be re-used. + domblockstore []ID // scratch space for computing dominators + scrSparse []*sparseSet // scratch sparse sets to be re-used. } type TypeSource interface { diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go index 50ff472ca3..2d53b5a957 100644 --- a/src/cmd/compile/internal/ssa/dom.go +++ b/src/cmd/compile/internal/ssa/dom.go @@ -54,17 +54,53 @@ func postorder(f *Func) []*Block { type linkedBlocks func(*Block) []*Block +const nscratchslices = 8 + +// experimentally, functions with 512 or fewer blocks account +// for 75% of memory (size) allocation for dominator computation +// in make.bash. +const minscratchblocks = 512 + +func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g, h []ID) { + tot := maxBlockID * nscratchslices + scratch := cfg.domblockstore + if len(scratch) < tot { + // req = min(1.5*tot, nscratchslices*minscratchblocks) + // 50% padding allows for graph growth in later phases. + req := (tot * 3) >> 1 + if req < nscratchslices*minscratchblocks { + req = nscratchslices * minscratchblocks + } + scratch = make([]ID, req) + cfg.domblockstore = scratch + } else { + // Clear as much of scratch as we will (re)use + scratch = scratch[0:tot] + for i := range scratch { + scratch[i] = 0 + } + } + + a = scratch[0*maxBlockID : 1*maxBlockID] + b = scratch[1*maxBlockID : 2*maxBlockID] + c = scratch[2*maxBlockID : 3*maxBlockID] + d = scratch[3*maxBlockID : 4*maxBlockID] + e = scratch[4*maxBlockID : 5*maxBlockID] + f = scratch[5*maxBlockID : 6*maxBlockID] + g = scratch[6*maxBlockID : 7*maxBlockID] + h = scratch[7*maxBlockID : 8*maxBlockID] + + return +} + // dfs performs a depth first search over the blocks starting at the set of // blocks in the entries list (in arbitrary order). dfnum contains a mapping // from block id to an int indicating the order the block was reached or // notFound if the block was not reached. order contains a mapping from dfnum // to block. -func dfs(entries []*Block, succFn linkedBlocks) (fromID []*Block, dfnum []int32, order []ID, parent []ID) { +func (f *Func) dfs(entries []*Block, succFn linkedBlocks, dfnum, order, parent []ID) (fromID []*Block) { maxBlockID := entries[0].Func.NumBlocks() - dfnum = make([]int32, maxBlockID) - order = make([]ID, maxBlockID) - parent = make([]ID, maxBlockID) fromID = make([]*Block, maxBlockID) for _, entry := range entries[0].Func.Blocks { @@ -75,7 +111,7 @@ func dfs(entries []*Block, succFn linkedBlocks) (fromID []*Block, dfnum []int32, fromID[eid] = entry } - n := int32(0) + n := ID(0) s := make([]*Block, 0, 256) for _, entry := range entries { if dfnum[entry.ID] != notFound { @@ -113,7 +149,7 @@ func dominators(f *Func) []*Block { //TODO: benchmark and try to find criteria for swapping between // dominatorsSimple and dominatorsLT - return dominatorsLT([]*Block{f.Entry}, preds, succs) + return f.dominatorsLT([]*Block{f.Entry}, preds, succs) } // postDominators computes the post-dominator tree for f. @@ -139,27 +175,35 @@ func postDominators(f *Func) []*Block { if exits == nil { return make([]*Block, f.NumBlocks()) } - return dominatorsLT(exits, succs, preds) + return f.dominatorsLT(exits, succs, preds) } // dominatorsLt runs Lengauer-Tarjan to compute a dominator tree starting at // entry and using predFn/succFn to find predecessors/successors to allow // computing both dominator and post-dominator trees. -func dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block { +func (f *Func) dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block { // Based on Lengauer-Tarjan from Modern Compiler Implementation in C - // Appel with optimizations from Finding Dominators in Practice - // Georgiadis + maxBlockID := entries[0].Func.NumBlocks() + + dfnum, vertex, parent, semi, samedom, ancestor, best, bucket := f.Config.scratchBlocksForDom(maxBlockID) + + // dfnum := make([]ID, maxBlockID) // conceptually int32, but punning for allocation purposes. + // vertex := make([]ID, maxBlockID) + // parent := make([]ID, maxBlockID) + + // semi := make([]ID, maxBlockID) + // samedom := make([]ID, maxBlockID) + // ancestor := make([]ID, maxBlockID) + // best := make([]ID, maxBlockID) + // bucket := make([]ID, maxBlockID) + // Step 1. Carry out a depth first search of the problem graph. Number // the vertices from 1 to n as they are reached during the search. - fromID, dfnum, vertex, parent := dfs(entries, succFn) + fromID := f.dfs(entries, succFn, dfnum, vertex, parent) - maxBlockID := entries[0].Func.NumBlocks() - semi := make([]ID, maxBlockID) - samedom := make([]ID, maxBlockID) - ancestor := make([]ID, maxBlockID) - best := make([]ID, maxBlockID) - bucket := make([]ID, maxBlockID) idom := make([]*Block, maxBlockID) // Step 2. Compute the semidominators of all vertices by applying @@ -242,7 +286,7 @@ func dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) [] } // eval function from LT paper with path compression -func eval(v ID, ancestor []ID, semi []ID, dfnum []int32, best []ID) ID { +func eval(v ID, ancestor []ID, semi []ID, dfnum []ID, best []ID) ID { a := ancestor[v] if ancestor[a] != 0 { bid := eval(a, ancestor, semi, dfnum, best) diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index bfb6f7da76..a55f81d4ac 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -964,7 +964,16 @@ func (s *regAllocState) regalloc(f *Func) { } // Save end-of-block register state. - var regList []endReg + // First count how many, this cuts allocations in half. + k := 0 + for r := register(0); r < numRegs; r++ { + v := s.regs[r].v + if v == nil { + continue + } + k++ + } + regList := make([]endReg, 0, k) for r := register(0); r < numRegs; r++ { v := s.regs[r].v if v == nil { @@ -1609,8 +1618,8 @@ func (s *regAllocState) computeLive() { } // The live set has changed, update it. l := s.live[p.ID][:0] - if cap(l) == 0 { - l = make([]liveInfo, 0, len(t.contents())) + if cap(l) < t.size() { + l = make([]liveInfo, 0, t.size()) } for _, e := range t.contents() { l = append(l, liveInfo{e.key, e.val}) -- 2.48.1