From 68bd383368b5958f8f02c49bc75134a0ef61daec Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Tue, 18 Oct 2022 16:07:36 -0700 Subject: [PATCH] cmd/compile: add cache of sizeable objects so they can be reused We kind of have this mechanism already, just normalizing it and using it in a bunch of places. Previously a bunch of places cached slices only for the duration of a single function compilation. Now we can reuse slices across a whole compiler run. Use a sync.Pool of powers-of-two sizes. This lets us use not too much memory, and avoid holding onto memory we're no longer using when a GC happens. There's a few different types we need, so generate the code for it. Generics would be useful here, but we can't use generics in the compiler because of bootstrapping. Change-Id: I6cf37e7b7b2e802882aaa723a0b29770511ccd82 Reviewed-on: https://go-review.googlesource.com/c/go/+/444820 Run-TryBot: Keith Randall Reviewed-by: Heschi Kreinick TryBot-Result: Gopher Robot Reviewed-by: David Chase --- .../compile/internal/ssa/_gen/allocators.go | 198 ++++++++++ src/cmd/compile/internal/ssa/_gen/main.go | 1 + src/cmd/compile/internal/ssa/allocators.go | 343 ++++++++++++++++++ src/cmd/compile/internal/ssa/cache.go | 39 +- src/cmd/compile/internal/ssa/critical.go | 3 +- src/cmd/compile/internal/ssa/cse.go | 10 +- src/cmd/compile/internal/ssa/deadcode.go | 29 +- src/cmd/compile/internal/ssa/dom.go | 57 +-- src/cmd/compile/internal/ssa/flagalloc.go | 3 +- src/cmd/compile/internal/ssa/func.go | 78 +--- src/cmd/compile/internal/ssa/layout.go | 9 +- src/cmd/compile/internal/ssa/likelyadjust.go | 12 +- .../compile/internal/ssa/loopreschedchecks.go | 6 +- src/cmd/compile/internal/ssa/looprotate.go | 16 +- src/cmd/compile/internal/ssa/nilcheck.go | 6 +- src/cmd/compile/internal/ssa/print.go | 2 +- src/cmd/compile/internal/ssa/regalloc.go | 13 +- src/cmd/compile/internal/ssa/schedule.go | 9 +- src/cmd/compile/internal/ssa/stackalloc.go | 26 +- src/cmd/compile/internal/ssa/tighten.go | 6 +- src/cmd/compile/internal/ssa/writebarrier.go | 3 +- 21 files changed, 656 insertions(+), 213 deletions(-) create mode 100644 src/cmd/compile/internal/ssa/_gen/allocators.go create mode 100644 src/cmd/compile/internal/ssa/allocators.go diff --git a/src/cmd/compile/internal/ssa/_gen/allocators.go b/src/cmd/compile/internal/ssa/_gen/allocators.go new file mode 100644 index 0000000000..0f3968c485 --- /dev/null +++ b/src/cmd/compile/internal/ssa/_gen/allocators.go @@ -0,0 +1,198 @@ +package main + +// TODO: should we share backing storage for similarly-shaped types? +// e.g. []*Value and []*Block, or even []int32 and []bool. + +import ( + "bytes" + "fmt" + "go/format" + "io" + "log" + "os" +) + +type allocator struct { + name string // name for alloc/free functions + typ string // the type they return/accept + mak string // code to make a new object (takes power-of-2 size as fmt arg) + capacity string // code to calculate the capacity of an object. Should always report a power of 2. + resize string // code to shrink to sub-power-of-two size (takes size as fmt arg) + clear string // code for clearing object before putting it on the free list + minLog int // log_2 of minimum allocation size + maxLog int // log_2 of maximum allocation size +} + +func genAllocators() { + allocators := []allocator{ + { + name: "ValueSlice", + typ: "[]*Value", + capacity: "cap(%s)", + mak: "make([]*Value, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = nil\n}", + minLog: 5, + maxLog: 32, + }, + { + name: "BlockSlice", + typ: "[]*Block", + capacity: "cap(%s)", + mak: "make([]*Block, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = nil\n}", + minLog: 5, + maxLog: 32, + }, + { + name: "BoolSlice", + typ: "[]bool", + capacity: "cap(%s)", + mak: "make([]bool, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = false\n}", + minLog: 8, + maxLog: 32, + }, + { + name: "IntSlice", + typ: "[]int", + capacity: "cap(%s)", + mak: "make([]int, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", + minLog: 5, + maxLog: 32, + }, + { + name: "Int32Slice", + typ: "[]int32", + capacity: "cap(%s)", + mak: "make([]int32, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", + minLog: 6, + maxLog: 32, + }, + { + name: "Int8Slice", + typ: "[]int8", + capacity: "cap(%s)", + mak: "make([]int8, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", + minLog: 8, + maxLog: 32, + }, + { + name: "IDSlice", + typ: "[]ID", + capacity: "cap(%s)", + mak: "make([]ID, %s)", + resize: "%s[:%s]", + clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", + minLog: 6, + maxLog: 32, + }, + { + name: "SparseSet", + typ: "*sparseSet", + capacity: "%s.cap()", + mak: "newSparseSet(%s)", + resize: "", // larger-sized sparse sets are ok + clear: "%s.clear()", + minLog: 5, + maxLog: 32, + }, + { + name: "SparseMap", + typ: "*sparseMap", + capacity: "%s.cap()", + mak: "newSparseMap(%s)", + resize: "", // larger-sized sparse maps are ok + clear: "%s.clear()", + minLog: 5, + maxLog: 32, + }, + { + name: "SparseMapPos", + typ: "*sparseMapPos", + capacity: "%s.cap()", + mak: "newSparseMapPos(%s)", + resize: "", // larger-sized sparse maps are ok + clear: "%s.clear()", + minLog: 5, + maxLog: 32, + }, + } + + w := new(bytes.Buffer) + fmt.Fprintf(w, "// Code generated from _gen/allocators.go; DO NOT EDIT.\n") + fmt.Fprintln(w) + fmt.Fprintln(w, "package ssa") + + fmt.Fprintln(w, "import (") + fmt.Fprintln(w, "\"math/bits\"") + fmt.Fprintln(w, "\"sync\"") + fmt.Fprintln(w, ")") + for _, a := range allocators { + genAllocator(w, a) + } + // gofmt result + b := w.Bytes() + var err error + b, err = format.Source(b) + if err != nil { + fmt.Printf("%s\n", w.Bytes()) + panic(err) + } + + if err := os.WriteFile("../allocators.go", b, 0666); err != nil { + log.Fatalf("can't write output: %v\n", err) + } +} +func genAllocator(w io.Writer, a allocator) { + fmt.Fprintf(w, "var poolFree%s [%d]sync.Pool\n", a.name, a.maxLog-a.minLog) + fmt.Fprintf(w, "func (c *Cache) alloc%s(n int) %s {\n", a.name, a.typ) + fmt.Fprintf(w, "var s %s\n", a.typ) + fmt.Fprintf(w, "n2 := n\n") + fmt.Fprintf(w, "if n2 < %d { n2 = %d }\n", 1< 0 { // pop a reachable value v := q[len(q)-1] + q[len(q)-1] = nil q = q[:len(q)-1] for i, x := range v.Args { if v.Op == OpPhi && !reachable[v.Block.Preds[i].b.ID] { @@ -213,8 +204,8 @@ func deadcode(f *Func) { // Find live values. live, order := liveValues(f, reachable) - defer f.retDeadcodeLive(live) - defer f.retDeadcodeLiveOrderStmts(order) + defer func() { f.Cache.freeBoolSlice(live) }() + defer func() { f.Cache.freeValueSlice(order) }() // Remove dead & duplicate entries from namedValues map. s := f.newSparseSet(f.NumValues()) diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go index f31e7df724..dd40b2ae81 100644 --- a/src/cmd/compile/internal/ssa/dom.go +++ b/src/cmd/compile/internal/ssa/dom.go @@ -21,7 +21,8 @@ type blockAndIndex struct { // postorderWithNumbering provides a DFS postordering. // This seems to make loop-finding more robust. func postorderWithNumbering(f *Func, ponums []int32) []*Block { - seen := make([]bool, f.NumBlocks()) + seen := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(seen) // result ordering order := make([]*Block, 0, len(f.Blocks)) @@ -56,44 +57,6 @@ func postorderWithNumbering(f *Func, ponums []int32) []*Block { type linkedBlocks func(*Block) []Edge -const nscratchslices = 7 - -// experimentally, functions with 512 or fewer blocks account -// for 75% of memory (size) allocation for dominator computation -// in make.bash. -const minscratchblocks = 512 - -func (cache *Cache) scratchBlocksForDom(maxBlockID int) (a, b, c, d, e, f, g []ID) { - tot := maxBlockID * nscratchslices - scratch := cache.domblockstore - if len(scratch) < tot { - // req = min(1.5*tot, nscratchslices*minscratchblocks) - // 50% padding allows for graph growth in later phases. - req := (tot * 3) >> 1 - if req < nscratchslices*minscratchblocks { - req = nscratchslices * minscratchblocks - } - scratch = make([]ID, req) - cache.domblockstore = scratch - } else { - // Clear as much of scratch as we will (re)use - scratch = scratch[0:tot] - for i := range scratch { - scratch[i] = 0 - } - } - - a = scratch[0*maxBlockID : 1*maxBlockID] - b = scratch[1*maxBlockID : 2*maxBlockID] - c = scratch[2*maxBlockID : 3*maxBlockID] - d = scratch[3*maxBlockID : 4*maxBlockID] - e = scratch[4*maxBlockID : 5*maxBlockID] - f = scratch[5*maxBlockID : 6*maxBlockID] - g = scratch[6*maxBlockID : 7*maxBlockID] - - return -} - func dominators(f *Func) []*Block { preds := func(b *Block) []Edge { return b.Preds } succs := func(b *Block) []Edge { return b.Succs } @@ -110,12 +73,21 @@ func (f *Func) dominatorsLTOrig(entry *Block, predFn linkedBlocks, succFn linked // Adapted directly from the original TOPLAS article's "simple" algorithm maxBlockID := entry.Func.NumBlocks() - semi, vertex, label, parent, ancestor, bucketHead, bucketLink := f.Cache.scratchBlocksForDom(maxBlockID) + scratch := f.Cache.allocIDSlice(7 * maxBlockID) + defer f.Cache.freeIDSlice(scratch) + semi := scratch[0*maxBlockID : 1*maxBlockID] + vertex := scratch[1*maxBlockID : 2*maxBlockID] + label := scratch[2*maxBlockID : 3*maxBlockID] + parent := scratch[3*maxBlockID : 4*maxBlockID] + ancestor := scratch[4*maxBlockID : 5*maxBlockID] + bucketHead := scratch[5*maxBlockID : 6*maxBlockID] + bucketLink := scratch[6*maxBlockID : 7*maxBlockID] // This version uses integers for most of the computation, // to make the work arrays smaller and pointer-free. // fromID translates from ID to *Block where that is needed. - fromID := make([]*Block, maxBlockID) + fromID := f.Cache.allocBlockSlice(maxBlockID) + defer f.Cache.freeBlockSlice(fromID) for _, v := range f.Blocks { fromID[v.ID] = v } @@ -243,7 +215,8 @@ func dominatorsSimple(f *Func) []*Block { post := f.postorder() // Make map from block id to order index (for intersect call) - postnum := make([]int, f.NumBlocks()) + postnum := f.Cache.allocIntSlice(f.NumBlocks()) + defer f.Cache.freeIntSlice(postnum) for i, b := range post { postnum[b.ID] = i } diff --git a/src/cmd/compile/internal/ssa/flagalloc.go b/src/cmd/compile/internal/ssa/flagalloc.go index 61c45a6be7..cf2c9a0023 100644 --- a/src/cmd/compile/internal/ssa/flagalloc.go +++ b/src/cmd/compile/internal/ssa/flagalloc.go @@ -11,7 +11,8 @@ func flagalloc(f *Func) { // Compute the in-register flag value we want at the end of // each block. This is basically a best-effort live variable // analysis, so it can be much simpler than a full analysis. - end := make([]*Value, f.NumBlocks()) + end := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(end) po := f.postorder() for n := 0; n < 2; n++ { for _, b := range po { diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go index a1330a539e..ec811af7c9 100644 --- a/src/cmd/compile/internal/ssa/func.go +++ b/src/cmd/compile/internal/ssa/func.go @@ -105,74 +105,35 @@ func (f *Func) NumValues() int { // newSparseSet returns a sparse set that can store at least up to n integers. func (f *Func) newSparseSet(n int) *sparseSet { - for i, scr := range f.Cache.scrSparseSet { - if scr != nil && scr.cap() >= n { - f.Cache.scrSparseSet[i] = nil - scr.clear() - return scr - } - } - return newSparseSet(n) + return f.Cache.allocSparseSet(n) } // retSparseSet returns a sparse set to the config's cache of sparse // sets to be reused by f.newSparseSet. func (f *Func) retSparseSet(ss *sparseSet) { - for i, scr := range f.Cache.scrSparseSet { - if scr == nil { - f.Cache.scrSparseSet[i] = ss - return - } - } - f.Cache.scrSparseSet = append(f.Cache.scrSparseSet, ss) + f.Cache.freeSparseSet(ss) } // newSparseMap returns a sparse map that can store at least up to n integers. func (f *Func) newSparseMap(n int) *sparseMap { - for i, scr := range f.Cache.scrSparseMap { - if scr != nil && scr.cap() >= n { - f.Cache.scrSparseMap[i] = nil - scr.clear() - return scr - } - } - return newSparseMap(n) + return f.Cache.allocSparseMap(n) } // retSparseMap returns a sparse map to the config's cache of sparse // sets to be reused by f.newSparseMap. func (f *Func) retSparseMap(ss *sparseMap) { - for i, scr := range f.Cache.scrSparseMap { - if scr == nil { - f.Cache.scrSparseMap[i] = ss - return - } - } - f.Cache.scrSparseMap = append(f.Cache.scrSparseMap, ss) + f.Cache.freeSparseMap(ss) } // newSparseMapPos returns a sparse map that can store at least up to n integers. func (f *Func) newSparseMapPos(n int) *sparseMapPos { - for i, scr := range f.Cache.scrSparseMapPos { - if scr != nil && scr.cap() >= n { - f.Cache.scrSparseMapPos[i] = nil - scr.clear() - return scr - } - } - return newSparseMapPos(n) + return f.Cache.allocSparseMapPos(n) } // retSparseMapPos returns a sparse map to the config's cache of sparse // sets to be reused by f.newSparseMapPos. func (f *Func) retSparseMapPos(ss *sparseMapPos) { - for i, scr := range f.Cache.scrSparseMapPos { - if scr == nil { - f.Cache.scrSparseMapPos[i] = ss - return - } - } - f.Cache.scrSparseMapPos = append(f.Cache.scrSparseMapPos, ss) + f.Cache.freeSparseMapPos(ss) } // newPoset returns a new poset from the internal cache @@ -190,33 +151,6 @@ func (f *Func) retPoset(po *poset) { f.Cache.scrPoset = append(f.Cache.scrPoset, po) } -// newDeadcodeLive returns a slice for the -// deadcode pass to use to indicate which values are live. -func (f *Func) newDeadcodeLive() []bool { - r := f.Cache.deadcode.live - f.Cache.deadcode.live = nil - return r -} - -// retDeadcodeLive returns a deadcode live value slice for re-use. -func (f *Func) retDeadcodeLive(live []bool) { - f.Cache.deadcode.live = live -} - -// newDeadcodeLiveOrderStmts returns a slice for the -// deadcode pass to use to indicate which values -// need special treatment for statement boundaries. -func (f *Func) newDeadcodeLiveOrderStmts() []*Value { - r := f.Cache.deadcode.liveOrderStmts - f.Cache.deadcode.liveOrderStmts = nil - return r -} - -// retDeadcodeLiveOrderStmts returns a deadcode liveOrderStmts slice for re-use. -func (f *Func) retDeadcodeLiveOrderStmts(liveOrderStmts []*Value) { - f.Cache.deadcode.liveOrderStmts = liveOrderStmts -} - func (f *Func) localSlotAddr(slot LocalSlot) *LocalSlot { a, ok := f.CanonicalLocalSlots[slot] if !ok { diff --git a/src/cmd/compile/internal/ssa/layout.go b/src/cmd/compile/internal/ssa/layout.go index 6abdb0d0c9..e4a8c6ffbf 100644 --- a/src/cmd/compile/internal/ssa/layout.go +++ b/src/cmd/compile/internal/ssa/layout.go @@ -20,9 +20,12 @@ func layoutRegallocOrder(f *Func) []*Block { func layoutOrder(f *Func) []*Block { order := make([]*Block, 0, f.NumBlocks()) - scheduled := make([]bool, f.NumBlocks()) - idToBlock := make([]*Block, f.NumBlocks()) - indegree := make([]int, f.NumBlocks()) + scheduled := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(scheduled) + idToBlock := f.Cache.allocBlockSlice(f.NumBlocks()) + defer f.Cache.freeBlockSlice(idToBlock) + indegree := f.Cache.allocIntSlice(f.NumBlocks()) + defer f.Cache.freeIntSlice(indegree) posdegree := f.newSparseSet(f.NumBlocks()) // blocks with positive remaining degree defer f.retSparseSet(posdegree) // blocks with zero remaining degree. Use slice to simulate a LIFO queue to implement diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go index f462bf29a6..1d0e53cf5b 100644 --- a/src/cmd/compile/internal/ssa/likelyadjust.go +++ b/src/cmd/compile/internal/ssa/likelyadjust.go @@ -117,8 +117,10 @@ func likelyadjust(f *Func) { // in their rank order. 0 is default, more positive // is less likely. It's possible to assign a negative // unlikeliness (though not currently the case). - certain := make([]int8, f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit - local := make([]int8, f.NumBlocks()) // for our immediate predecessors. + certain := f.Cache.allocInt8Slice(f.NumBlocks()) // In the long run, all outcomes are at least this bad. Mainly for Exit + defer f.Cache.freeInt8Slice(certain) + local := f.Cache.allocInt8Slice(f.NumBlocks()) // for our immediate predecessors. + defer f.Cache.freeInt8Slice(local) po := f.postorder() nest := f.loopnest() @@ -277,7 +279,8 @@ func loopnestfor(f *Func) *loopnest { sdom := f.Sdom() b2l := make([]*loop, f.NumBlocks()) loops := make([]*loop, 0) - visited := make([]bool, f.NumBlocks()) + visited := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(visited) sawIrred := false if f.pass.debug > 2 { @@ -369,7 +372,8 @@ func loopnestfor(f *Func) *loopnest { ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred} // Calculate containsUnavoidableCall for regalloc - dominatedByCall := make([]bool, f.NumBlocks()) + dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks()) + defer f.Cache.freeBoolSlice(dominatedByCall) for _, b := range po { if checkContainsCall(b) { dominatedByCall[b.ID] = true diff --git a/src/cmd/compile/internal/ssa/loopreschedchecks.go b/src/cmd/compile/internal/ssa/loopreschedchecks.go index 1326fa5ee8..5a4f0b23d6 100644 --- a/src/cmd/compile/internal/ssa/loopreschedchecks.go +++ b/src/cmd/compile/internal/ssa/loopreschedchecks.go @@ -94,7 +94,8 @@ func insertLoopReschedChecks(f *Func) { lastMems[f.Entry.ID] = f.Entry.NewValue0(f.Entry.Pos, OpInitMem, types.TypeMem) } - memDefsAtBlockEnds := make([]*Value, f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block. + memDefsAtBlockEnds := f.Cache.allocValueSlice(f.NumBlocks()) // For each block, the mem def seen at its bottom. Could be from earlier block. + defer f.Cache.freeValueSlice(memDefsAtBlockEnds) // Propagate last mem definitions forward through successor blocks. for i := len(po) - 1; i >= 0; i-- { @@ -404,7 +405,8 @@ outer: func findLastMems(f *Func) []*Value { var stores []*Value - lastMems := make([]*Value, f.NumBlocks()) + lastMems := f.Cache.allocValueSlice(f.NumBlocks()) + defer f.Cache.freeValueSlice(lastMems) storeUse := f.newSparseSet(f.NumValues()) defer f.retSparseSet(storeUse) for _, b := range f.Blocks { diff --git a/src/cmd/compile/internal/ssa/looprotate.go b/src/cmd/compile/internal/ssa/looprotate.go index 2eefda1c8b..844a8f7124 100644 --- a/src/cmd/compile/internal/ssa/looprotate.go +++ b/src/cmd/compile/internal/ssa/looprotate.go @@ -30,7 +30,8 @@ func loopRotate(f *Func) { return } - idToIdx := make([]int, f.NumBlocks()) + idToIdx := f.Cache.allocIntSlice(f.NumBlocks()) + defer f.Cache.freeIntSlice(idToIdx) for i, b := range f.Blocks { idToIdx[b.ID] = i } @@ -92,20 +93,21 @@ func loopRotate(f *Func) { // Some blocks that are not part of a loop may be placed // between loop blocks. In order to avoid these blocks from // being overwritten, use a temporary slice. - newOrder := make([]*Block, 0, f.NumBlocks()) - for _, b := range f.Blocks { + oldOrder := f.Cache.allocBlockSlice(len(f.Blocks)) + defer f.Cache.freeBlockSlice(oldOrder) + copy(oldOrder, f.Blocks) + for _, b := range oldOrder { if _, ok := move[b.ID]; ok { continue } - newOrder = append(newOrder, b) + f.Blocks[j] = b j++ for _, a := range after[b.ID] { - newOrder = append(newOrder, a) + f.Blocks[j] = a j++ } } - if j != len(f.Blocks) { + if j != len(oldOrder) { f.Fatalf("bad reordering in looprotate") } - f.Blocks = newOrder } diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go index 5f23790c97..4f797a473f 100644 --- a/src/cmd/compile/internal/ssa/nilcheck.go +++ b/src/cmd/compile/internal/ssa/nilcheck.go @@ -41,7 +41,8 @@ func nilcheckelim(f *Func) { // map from value ID to bool indicating if value is known to be non-nil // in the current dominator path being walked. This slice is updated by // walkStates to maintain the known non-nil values. - nonNilValues := make([]bool, f.NumValues()) + nonNilValues := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(nonNilValues) // make an initial pass identifying any non-nil values for _, b := range f.Blocks { @@ -86,7 +87,8 @@ func nilcheckelim(f *Func) { // allocate auxiliary date structures for computing store order sset := f.newSparseSet(f.NumValues()) defer f.retSparseSet(sset) - storeNumber := make([]int32, f.NumValues()) + storeNumber := f.Cache.allocInt32Slice(f.NumValues()) + defer f.Cache.freeInt32Slice(storeNumber) // perform a depth first walk of the dominee tree for len(work) > 0 { diff --git a/src/cmd/compile/internal/ssa/print.go b/src/cmd/compile/internal/ssa/print.go index 85ba6b72c6..0d3b5d9e34 100644 --- a/src/cmd/compile/internal/ssa/print.go +++ b/src/cmd/compile/internal/ssa/print.go @@ -124,7 +124,7 @@ func (p stringFuncPrinter) named(n LocalSlot, vals []*Value) { func fprintFunc(p funcPrinter, f *Func) { reachable, live := findlive(f) - defer f.retDeadcodeLive(live) + defer f.Cache.freeBoolSlice(live) p.header(f) printed := make([]bool, f.NumValues()) for _, b := range f.Blocks { diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index ea7117586a..a25688fbd1 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -146,6 +146,7 @@ func regalloc(f *Func) { var s regAllocState s.init(f) s.regalloc(f) + s.close() } type register uint8 @@ -357,6 +358,12 @@ func (s *regAllocState) clobberRegs(m regMask) { // setOrig records that c's original value is the same as // v's original value. func (s *regAllocState) setOrig(c *Value, v *Value) { + if int(c.ID) >= cap(s.orig) { + x := s.f.Cache.allocValueSlice(int(c.ID) + 1) + copy(x, s.orig) + s.f.Cache.freeValueSlice(s.orig) + s.orig = x + } for int(c.ID) >= len(s.orig) { s.orig = append(s.orig, nil) } @@ -664,7 +671,7 @@ func (s *regAllocState) init(f *Func) { s.f.Cache.regallocValues = make([]valState, nv) } s.values = s.f.Cache.regallocValues - s.orig = make([]*Value, nv) + s.orig = s.f.Cache.allocValueSlice(nv) s.copies = make(map[*Value]bool) for _, b := range s.visitOrder { for _, v := range b.Values { @@ -728,6 +735,10 @@ func (s *regAllocState) init(f *Func) { } } +func (s *regAllocState) close() { + s.f.Cache.freeValueSlice(s.orig) +} + // Adds a use record for id at distance dist from the start of the block. // All calls to addUse must happen with nonincreasing dist. func (s *regAllocState) addUse(id ID, dist int32, pos src.XPos) { diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index 62eaa2ed45..6e570aa82a 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -94,13 +94,15 @@ func (op Op) isLoweredGetClosurePtr() bool { func schedule(f *Func) { // For each value, the number of times it is used in the block // by values that have not been scheduled yet. - uses := make([]int32, f.NumValues()) + uses := f.Cache.allocInt32Slice(f.NumValues()) + defer f.Cache.freeInt32Slice(uses) // reusable priority queue priq := new(ValHeap) // "priority" for a value - score := make([]int8, f.NumValues()) + score := f.Cache.allocInt8Slice(f.NumValues()) + defer f.Cache.freeInt8Slice(score) // scheduling order. We queue values in this list in reverse order. // A constant bound allows this to be stack-allocated. 64 is @@ -108,7 +110,8 @@ func schedule(f *Func) { order := make([]*Value, 0, 64) // maps mem values to the next live memory value - nextMem := make([]*Value, f.NumValues()) + nextMem := f.Cache.allocValueSlice(f.NumValues()) + defer f.Cache.freeValueSlice(nextMem) // additional pretend arguments for each Value. Used to enforce load/store ordering. additionalArgs := make([][]*Value, f.NumValues()) diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index d41f3996af..3e24b48a69 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -25,8 +25,6 @@ type stackAllocState struct { values []stackValState interfere [][]ID // interfere[v.id] = values that interfere with v. names []LocalSlot - slots []int - used []bool nArgSlot, // Number of Values sourced to arg slot nNotNeed, // Number of Values not needing a stack slot @@ -57,12 +55,6 @@ func putStackAllocState(s *stackAllocState) { for i := range s.names { s.names[i] = LocalSlot{} } - for i := range s.slots { - s.slots[i] = 0 - } - for i := range s.used { - s.used[i] = false - } s.f.Cache.stackAllocState = s s.f = nil s.live = nil @@ -218,25 +210,15 @@ func (s *stackAllocState) stackalloc() { // Each time we assign a stack slot to a value v, we remember // the slot we used via an index into locations[v.Type]. - slots := s.slots - if n := f.NumValues(); cap(slots) >= n { - slots = slots[:n] - } else { - slots = make([]int, n) - s.slots = slots - } + slots := f.Cache.allocIntSlice(f.NumValues()) + defer f.Cache.freeIntSlice(slots) for i := range slots { slots[i] = -1 } // Pick a stack slot for each value needing one. - var used []bool - if n := f.NumValues(); cap(s.used) >= n { - used = s.used[:n] - } else { - used = make([]bool, n) - s.used = used - } + used := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(used) for _, b := range f.Blocks { for _, v := range b.Values { if !s.values[v.ID].needSlot { diff --git a/src/cmd/compile/internal/ssa/tighten.go b/src/cmd/compile/internal/ssa/tighten.go index 214bf628bd..edae6a1cb7 100644 --- a/src/cmd/compile/internal/ssa/tighten.go +++ b/src/cmd/compile/internal/ssa/tighten.go @@ -10,7 +10,8 @@ package ssa // A Value can be moved to any block that // dominates all blocks in which it is used. func tighten(f *Func) { - canMove := make([]bool, f.NumValues()) + canMove := f.Cache.allocBoolSlice(f.NumValues()) + defer f.Cache.freeBoolSlice(canMove) for _, b := range f.Blocks { for _, v := range b.Values { if v.Op.isLoweredGetClosurePtr() { @@ -52,7 +53,8 @@ func tighten(f *Func) { lca := makeLCArange(f) // For each moveable value, record the block that dominates all uses found so far. - target := make([]*Block, f.NumValues()) + target := f.Cache.allocBlockSlice(f.NumValues()) + defer f.Cache.freeBlockSlice(target) // Grab loop information. // We use this to make sure we don't tighten a value into a (deeper) loop. diff --git a/src/cmd/compile/internal/ssa/writebarrier.go b/src/cmd/compile/internal/ssa/writebarrier.go index f5a7ed5928..2e7fc769df 100644 --- a/src/cmd/compile/internal/ssa/writebarrier.go +++ b/src/cmd/compile/internal/ssa/writebarrier.go @@ -139,7 +139,8 @@ func writebarrier(f *Func) { // allocate auxiliary data structures for computing store order sset = f.newSparseSet(f.NumValues()) defer f.retSparseSet(sset) - storeNumber = make([]int32, f.NumValues()) + storeNumber = f.Cache.allocInt32Slice(f.NumValues()) + defer f.Cache.freeInt32Slice(storeNumber) } // order values in store order -- 2.50.0