From b86cafc7dced537165a7cda61b90feae44796055 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Wed, 10 Feb 2016 17:43:31 -0500
Subject: [PATCH] [dev.ssa] cmd/compile: memory allocation tweaks to regalloc
 and dom

Spotted a minor source of excess allocation in the register
allocator.  Rearranged the dominator tree code to pull its
scratch memory from a reused buffer attached to Config.

Change-Id: I6da6e7b112f7d3eb1fd00c58faa8214cdea44e38
Reviewed-on: https://go-review.googlesource.com/19450
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
---
 src/cmd/compile/internal/ssa/config.go   |  3 +-
 src/cmd/compile/internal/ssa/dom.go      | 76 +++++++++++++++++++-----
 src/cmd/compile/internal/ssa/regalloc.go | 15 ++++-
 3 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go
index 530c480004..81061a7219 100644
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -24,7 +24,8 @@ type Config struct {
 	values [2000]Value
 	blocks [200]Block
 
-	scrSparse []*sparseSet // scratch sparse sets to be re-used.
+	domblockstore []ID         // scratch space for computing dominators
+	scrSparse     []*sparseSet // scratch sparse sets to be re-used.
 }
 
 type TypeSource interface {
diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go
index 50ff472ca3..2d53b5a957 100644
--- a/src/cmd/compile/internal/ssa/dom.go
+++ b/src/cmd/compile/internal/ssa/dom.go
@@ -54,17 +54,53 @@ func postorder(f *Func) []*Block {
 
 type linkedBlocks func(*Block) []*Block
 
+const nscratchslices = 8
+
+// experimentally, functions with 512 or fewer blocks account
+// for 75% of memory (size) allocation for dominator computation
+// in make.bash.
+const minscratchblocks = 512
+
+func (cfg *Config) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g, h []ID) {
+	tot := maxBlockID * nscratchslices
+	scratch := cfg.domblockstore
+	if len(scratch) < tot {
+		// req = min(1.5*tot, nscratchslices*minscratchblocks)
+		// 50% padding allows for graph growth in later phases.
+		req := (tot * 3) >> 1
+		if req < nscratchslices*minscratchblocks {
+			req = nscratchslices * minscratchblocks
+		}
+		scratch = make([]ID, req)
+		cfg.domblockstore = scratch
+	} else {
+		// Clear as much of scratch as we will (re)use
+		scratch = scratch[0:tot]
+		for i := range scratch {
+			scratch[i] = 0
+		}
+	}
+
+	a = scratch[0*maxBlockID : 1*maxBlockID]
+	b = scratch[1*maxBlockID : 2*maxBlockID]
+	c = scratch[2*maxBlockID : 3*maxBlockID]
+	d = scratch[3*maxBlockID : 4*maxBlockID]
+	e = scratch[4*maxBlockID : 5*maxBlockID]
+	f = scratch[5*maxBlockID : 6*maxBlockID]
+	g = scratch[6*maxBlockID : 7*maxBlockID]
+	h = scratch[7*maxBlockID : 8*maxBlockID]
+
+	return
+}
+
 // dfs performs a depth first search over the blocks starting at the set of
 // blocks in the entries list (in arbitrary order). dfnum contains a mapping
 // from block id to an int indicating the order the block was reached or
 // notFound if the block was not reached.  order contains a mapping from dfnum
 // to block.
-func dfs(entries []*Block, succFn linkedBlocks) (fromID []*Block, dfnum []int32, order []ID, parent []ID) {
+func (f *Func) dfs(entries []*Block, succFn linkedBlocks, dfnum, order, parent []ID) (fromID []*Block) {
 	maxBlockID := entries[0].Func.NumBlocks()
 
-	dfnum = make([]int32, maxBlockID)
-	order = make([]ID, maxBlockID)
-	parent = make([]ID, maxBlockID)
 	fromID = make([]*Block, maxBlockID)
 
 	for _, entry := range entries[0].Func.Blocks {
@@ -75,7 +111,7 @@ func dfs(entries []*Block, succFn linkedBlocks) (fromID []*Block, dfnum []int32,
 		fromID[eid] = entry
 	}
 
-	n := int32(0)
+	n := ID(0)
 	s := make([]*Block, 0, 256)
 	for _, entry := range entries {
 		if dfnum[entry.ID] != notFound {
@@ -113,7 +149,7 @@ func dominators(f *Func) []*Block {
 
 	//TODO: benchmark and try to find criteria for swapping between
 	// dominatorsSimple and dominatorsLT
-	return dominatorsLT([]*Block{f.Entry}, preds, succs)
+	return f.dominatorsLT([]*Block{f.Entry}, preds, succs)
 }
 
 // postDominators computes the post-dominator tree for f.
@@ -139,27 +175,35 @@ func postDominators(f *Func) []*Block {
 	if exits == nil {
 		return make([]*Block, f.NumBlocks())
 	}
-	return dominatorsLT(exits, succs, preds)
+	return f.dominatorsLT(exits, succs, preds)
 }
 
 // dominatorsLt runs Lengauer-Tarjan to compute a dominator tree starting at
 // entry and using predFn/succFn to find predecessors/successors to allow
 // computing both dominator and post-dominator trees.
-func dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block {
+func (f *Func) dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []*Block {
 	// Based on Lengauer-Tarjan from Modern Compiler Implementation in C -
 	// Appel with optimizations from Finding Dominators in Practice -
 	// Georgiadis
 
+	maxBlockID := entries[0].Func.NumBlocks()
+
+	dfnum, vertex, parent, semi, samedom, ancestor, best, bucket := f.Config.scratchBlocksForDom(maxBlockID)
+
+	// dfnum := make([]ID, maxBlockID) // conceptually int32, but punning for allocation purposes.
+	// vertex := make([]ID, maxBlockID)
+	// parent := make([]ID, maxBlockID)
+
+	// semi := make([]ID, maxBlockID)
+	// samedom := make([]ID, maxBlockID)
+	// ancestor := make([]ID, maxBlockID)
+	// best := make([]ID, maxBlockID)
+	// bucket := make([]ID, maxBlockID)
+
 	// Step 1. Carry out a depth first search of the problem graph. Number
 	// the vertices from 1 to n as they are reached during the search.
-	fromID, dfnum, vertex, parent := dfs(entries, succFn)
+	fromID := f.dfs(entries, succFn, dfnum, vertex, parent)
 
-	maxBlockID := entries[0].Func.NumBlocks()
-	semi := make([]ID, maxBlockID)
-	samedom := make([]ID, maxBlockID)
-	ancestor := make([]ID, maxBlockID)
-	best := make([]ID, maxBlockID)
-	bucket := make([]ID, maxBlockID)
 	idom := make([]*Block, maxBlockID)
 
 	// Step 2. Compute the semidominators of all vertices by applying
@@ -242,7 +286,7 @@ func dominatorsLT(entries []*Block, predFn linkedBlocks, succFn linkedBlocks) []
 }
 
 // eval function from LT paper with path compression
-func eval(v ID, ancestor []ID, semi []ID, dfnum []int32, best []ID) ID {
+func eval(v ID, ancestor []ID, semi []ID, dfnum []ID, best []ID) ID {
 	a := ancestor[v]
 	if ancestor[a] != 0 {
 		bid := eval(a, ancestor, semi, dfnum, best)
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index bfb6f7da76..a55f81d4ac 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -964,7 +964,16 @@ func (s *regAllocState) regalloc(f *Func) {
 		}
 
 		// Save end-of-block register state.
-		var regList []endReg
+		// First count how many, this cuts allocations in half.
+		k := 0
+		for r := register(0); r < numRegs; r++ {
+			v := s.regs[r].v
+			if v == nil {
+				continue
+			}
+			k++
+		}
+		regList := make([]endReg, 0, k)
 		for r := register(0); r < numRegs; r++ {
 			v := s.regs[r].v
 			if v == nil {
@@ -1609,8 +1618,8 @@ func (s *regAllocState) computeLive() {
 				}
 				// The live set has changed, update it.
 				l := s.live[p.ID][:0]
-				if cap(l) == 0 {
-					l = make([]liveInfo, 0, len(t.contents()))
+				if cap(l) < t.size() {
+					l = make([]liveInfo, 0, t.size())
 				}
 				for _, e := range t.contents() {
 					l = append(l, liveInfo{e.key, e.val})
-- 
2.50.0