type loop struct {
header *Block // The header node of this (reducible) loop
outer *loop // loop containing this loop
- // Next two fields not currently used, but cheap to maintain,
- // and aid in computation of inner-ness and list of blocks.
- nBlocks int32 // Number of blocks in this loop but not within inner loops
- isInner bool // True if never discovered to contain a loop
- containsCall bool // if any block in this loop or any loop it contains is a BlockCall or BlockDefer
+
+ // By default, children exits, and depth are not initialized.
+ children []*loop // loops nested directly within this loop. Initialized by assembleChildren().
+ exits []*Block // exits records blocks reached by exits from this loop. Initialized by findExits().
+
+ // Loops aren't that common, so rather than force regalloc to keep
+ // a map or slice for its data, just put it here.
+ spills []*Value
+ scratch int32
+
+ // Next three fields used by regalloc and/or
+ // aid in computation of inner-ness and list of blocks.
+ nBlocks int32 // Number of blocks in this loop but not within inner loops
+ depth int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths().
+ isInner bool // True if never discovered to contain a loop
+
+ // register allocation uses this.
+ containsCall bool // if any block in this loop or any loop it contains is a BlockCall or BlockDefer
}
// outerinner records that outer contains inner
po []*Block
sdom sparseTree
loops []*loop
+
+ // Record which of the lazily initialized fields have actually been initialized.
+ initializedChildren, initializedDepth, initializedExits bool
}
func min8(a, b int8) int8 {
innermost.nBlocks++
}
}
+
+ ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops}
+
+ // Curious about the loopiness? "-d=ssa/likelyadjust/stats"
+ if f.pass.stats > 0 && len(loops) > 0 {
+ ln.assembleChildren()
+ ln.calculateDepths()
+ ln.findExits()
+
+ // Note stats for non-innermost loops are slightly flawed because
+ // they don't account for inner loop exits that span multiple levels.
+
+ for _, l := range loops {
+ x := len(l.exits)
+ cf := 0
+ if !l.containsCall {
+ cf = 1
+ }
+ inner := 0
+ if l.isInner {
+ inner++
+ }
+
+ f.logStat("loopstats:",
+ l.depth, "depth", x, "exits",
+ inner, "is_inner", cf, "is_callfree", l.nBlocks, "n_blocks")
+ }
+ }
+
if f.pass.debug > 1 && len(loops) > 0 {
fmt.Printf("Loops in %s:\n", f.Name)
for _, l := range loops {
}
fmt.Print("\n")
}
- return &loopnest{f, b2l, po, sdom, loops}
+ return ln
+}
+
+// assembleChildren initializes the children field of each
+// loop in the nest. Loop A is a child of loop B if A is
+// directly nested within B (based on the reducible-loops
+// detection above)
+func (ln *loopnest) assembleChildren() {
+ if ln.initializedChildren {
+ return
+ }
+ for _, l := range ln.loops {
+ if l.outer != nil {
+ l.outer.children = append(l.outer.children, l)
+ }
+ }
+ ln.initializedChildren = true
+}
+
+// calculateDepths uses the children field of loops
+// to determine the nesting depth (outer=1) of each
+// loop. This is helpful for finding exit edges.
+func (ln *loopnest) calculateDepths() {
+ if ln.initializedDepth {
+ return
+ }
+ ln.assembleChildren()
+ for _, l := range ln.loops {
+ if l.outer == nil {
+ l.setDepth(1)
+ }
+ }
+ ln.initializedDepth = true
+}
+
+// findExits uses loop depth information to find the
+// exits from a loop.
+func (ln *loopnest) findExits() {
+ if ln.initializedExits {
+ return
+ }
+ ln.calculateDepths()
+ b2l := ln.b2l
+ for _, b := range ln.po {
+ l := b2l[b.ID]
+ if l != nil && len(b.Succs) == 2 {
+ sl := b2l[b.Succs[0].ID]
+ if recordIfExit(l, sl, b.Succs[0]) {
+ continue
+ }
+ sl = b2l[b.Succs[1].ID]
+ if recordIfExit(l, sl, b.Succs[1]) {
+ continue
+ }
+ }
+ }
+ ln.initializedExits = true
+}
+
+// recordIfExit checks sl (the loop containing b) to see if it
+// is outside of loop l, and if so, records b as an exit block
+// from l and returns true.
+func recordIfExit(l, sl *loop, b *Block) bool {
+ if sl != l {
+ if sl == nil || sl.depth <= l.depth {
+ l.exits = append(l.exits, b)
+ return true
+ }
+ // sl is not nil, and is deeper than l
+ // it's possible for this to be a goto into an irreducible loop made from gotos.
+ for sl.depth > l.depth {
+ sl = sl.outer
+ }
+ if sl != l {
+ l.exits = append(l.exits, b)
+ return true
+ }
+ }
+ return false
+}
+
+func (l *loop) setDepth(d int16) {
+ l.depth = d
+ for _, c := range l.children {
+ c.setDepth(d + 1)
+ }
}
// will have no use (so don't run deadcode after regalloc!).
// TODO: maybe we should introduce these extra phis?
+// Additional not-quite-SSA output occurs when spills are sunk out
+// of loops to the targets of exit edges from the loop. Before sinking,
+// there is one spill site (one StoreReg) targeting stack slot X, after
+// sinking there may be multiple spill sites targeting stack slot X,
+// with no phi functions at any join points reachable by the multiple
+// spill sites.
+
package ssa
import (
)
const (
- logSpills = iota
+ moveSpills = iota
+ logSpills
regDebug
stackDebug
)
uses *use // list of uses in this block
spill *Value // spilled copy of the Value
spillUsed bool
+ spillUsedShuffle bool // true if used in shuffling, after ordinary uses
needReg bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
rematerializeable bool // cached value of v.rematerializeable()
desired register // register we want value to be in, if any
loopnest *loopnest
}
+type spillToSink struct {
+ spill *Value // Spill instruction to move (a StoreReg)
+ dests int32 // Bitmask indicating exit blocks from loop in which spill/val is defined. 1<<i set means val is live into loop.exitBlocks[i]
+}
+
+func (sts *spillToSink) spilledValue() *Value {
+ return sts.spill.Args[0]
+}
+
type endReg struct {
r register
v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
return m &^ s.reserved()
}
+// loopForBlock returns the loop containing block b,
+// provided that the loop is "interesting" for purposes
+// of improving register allocation (= is inner, and does
+// not contain a call)
+func (s *regAllocState) loopForBlock(b *Block) *loop {
+ loop := s.loopnest.b2l[b.ID]
+
+ // Minor for-the-time-being optimization: nothing happens
+ // unless a loop is both inner and call-free, therefore
+ // don't bother with other loops.
+ if loop != nil && (loop.containsCall || !loop.isInner) {
+ loop = nil
+ }
+ return loop
+}
+
func (s *regAllocState) regalloc(f *Func) {
liveSet := f.newSparseSet(f.NumValues())
defer f.retSparseSet(liveSet)
var phiRegs []register
var args []*Value
+ // statistics
+ var nSpills int // # of spills remaining
+ var nSpillsInner int // # of spills remaining in inner loops
+ var nSpillsSunk int // # of sunk spills remaining
+ var nSpillsChanged int // # of sunk spills lost because of register use change
+ var nSpillsSunkUnused int // # of spills not sunk because they were removed completely
+ var nSpillsNotSunkLateUse int // # of spills not sunk because of very late use (in shuffle)
+
if f.Entry != f.Blocks[0] {
f.Fatalf("entry block must be first")
}
+ // Get loop nest so that spills in inner loops can be
+ // tracked. When the last block of a loop is processed,
+ // attempt to move spills out of the loop.
+ s.loopnest.findExits()
+
+ // Spills are moved from one block's slice of values to another's.
+ // This confuses register allocation if it occurs before it is
+ // complete, so candidates are recorded, then rechecked and
+ // moved after all allocation (register and stack) is complete.
+ // Because movement is only within a stack slot's lifetime, it
+ // is safe to do this.
+ var toSink []spillToSink
+ // Will be used to figure out live inputs to exit blocks of inner loops.
+ entryCandidates := newSparseMap(f.NumValues())
+
for _, b := range f.Blocks {
s.curBlock = b
+ loop := s.loopForBlock(b)
// Initialize liveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis.
s.setOrig(spill, v)
s.values[v.ID].spill = spill
s.values[v.ID].spillUsed = false
+ if loop != nil {
+ loop.spills = append(loop.spills, v)
+ nSpillsInner++
+ }
+ nSpills++
}
// Save the starting state for use by merge edges.
s.setOrig(spill, v)
s.values[v.ID].spill = spill
s.values[v.ID].spillUsed = false
+ if loop != nil {
+ loop.spills = append(loop.spills, v)
+ nSpillsInner++
+ }
+ nSpills++
}
}
s.values[e.ID].spillUsed = true
}
+ // Keep track of values that are spilled in the loop, but whose spill
+ // is not used in the loop. It may be possible to move ("sink") the
+ // spill out of the loop into one or more exit blocks.
+ if loop != nil {
+ loop.scratch++ // increment count of blocks in this loop that have been processed
+ if loop.scratch == loop.nBlocks { // just processed last block of loop, if it is an inner loop.
+ // This check is redundant with code at the top of the loop.
+ // This is definitive; the one at the top of the loop is an optimization.
+ if loop.isInner && // Common case, easier, most likely to be profitable
+ !loop.containsCall && // Calls force spills, also lead to puzzling spill info.
+ len(loop.exits) <= 32 { // Almost no inner loops have more than 32 exits,
+ // and this allows use of a bitvector and a sparseMap.
+
+ // TODO: exit calculation is messed up for non-inner loops
+ // because of multilevel exits that are not part of the "exit"
+ // count.
+
+ // Compute the set of spill-movement candidates live at entry to exit blocks.
+ // isLoopSpillCandidate filters for
+ // (1) defined in appropriate loop
+ // (2) needs a register
+ // (3) spill not already used (in the loop)
+ // Condition (3) === "in a register at all loop exits"
+
+ entryCandidates.clear()
+
+ for whichExit, ss := range loop.exits {
+ // Start with live at end.
+ for _, li := range s.live[ss.ID] {
+ if s.isLoopSpillCandidate(loop, s.orig[li.ID]) {
+ entryCandidates.setBit(li.ID, uint(whichExit))
+ }
+ }
+ // Control can also be live.
+ if ss.Control != nil && s.isLoopSpillCandidate(loop, ss.Control) {
+ entryCandidates.setBit(ss.Control.ID, uint(whichExit))
+ }
+ // Walk backwards, filling in locally live values, removing those defined.
+ for i := len(ss.Values) - 1; i >= 0; i-- {
+ v := ss.Values[i]
+ entryCandidates.remove(v.ID) // Cannot be an issue, only keeps the sets smaller.
+ for _, a := range v.Args {
+ if s.isLoopSpillCandidate(loop, a) {
+ entryCandidates.setBit(a.ID, uint(whichExit))
+ }
+ }
+ }
+ }
+
+ for _, e := range loop.spills {
+ whichblocks := entryCandidates.get(e.ID)
+ oldSpill := s.values[e.ID].spill
+ if whichblocks != 0 && whichblocks != -1 { // -1 = not in map.
+ toSink = append(toSink, spillToSink{spill: oldSpill, dests: whichblocks})
+ }
+ }
+
+ } // loop is inner etc
+ loop.scratch = 0 // Don't leave a mess, just in case.
+ loop.spills = nil
+ } // if scratch == nBlocks
+ } // if loop is not nil
+
// Clear any final uses.
// All that is left should be the pseudo-uses added for values which
// are live at the end of b.
// Constants, SP, SB, ...
continue
}
+ loop := s.loopForBlock(spill.Block)
+ if loop != nil {
+ nSpillsInner--
+ }
+
spill.Args[0].Uses--
f.freeValue(spill)
+ nSpills--
}
+
for _, b := range f.Blocks {
i := 0
for _, v := range b.Values {
// Not important now because this is the last phase that manipulates Values
}
+ // Must clear these out before any potential recycling, though that's
+ // not currently implemented.
+ for i, ts := range toSink {
+ vsp := ts.spill
+ if vsp.Op == OpInvalid { // This spill was completely eliminated
+ toSink[i].spill = nil
+ }
+ }
+
// Anything that didn't get a register gets a stack location here.
// (StoreReg, stack-based phis, inputs, ...)
stacklive := stackalloc(s.f, s.spillLive)
// Fix up all merge edges.
s.shuffle(stacklive)
+
+ // Insert moved spills (that have not been marked invalid above)
+ // at start of appropriate block and remove the originals from their
+ // location within loops. Notice that this can break SSA form;
+ // if a spill is sunk to multiple exits, there will be no phi for that
+ // spill at a join point downstream of those two exits, though the
+ // two spills will target the same stack slot. Notice also that this
+ // takes place after stack allocation, so the stack allocator does
+ // not need to process these malformed flow graphs.
+sinking:
+ for _, ts := range toSink {
+ vsp := ts.spill
+ if vsp == nil { // This spill was completely eliminated
+ nSpillsSunkUnused++
+ continue sinking
+ }
+ e := ts.spilledValue()
+ if s.values[e.ID].spillUsedShuffle {
+ nSpillsNotSunkLateUse++
+ continue sinking
+ }
+
+ // move spills to a better (outside of loop) block.
+ // This would be costly if it occurred very often, but it doesn't.
+ b := vsp.Block
+ loop := s.loopnest.b2l[b.ID]
+ dests := ts.dests
+
+ // Pre-check to be sure that spilled value is still in expected register on all exits where live.
+ check_val_still_in_reg:
+ for i := uint(0); i < 32 && dests != 0; i++ {
+
+ if dests&(1<<i) == 0 {
+ continue
+ }
+ dests ^= 1 << i
+ d := loop.exits[i]
+ if len(d.Preds) > 1 {
+ panic("Should be impossible given critical edges removed")
+ }
+ p := d.Preds[0] // block in loop exiting to d.
+
+ endregs := s.endRegs[p.ID]
+ for _, regrec := range endregs {
+ if regrec.v == e && regrec.r != noRegister && regrec.c == e { // TODO: regrec.c != e implies different spill possible.
+ continue check_val_still_in_reg
+ }
+ }
+ // If here, the register assignment was lost down at least one exit and it can't be sunk
+ if s.f.pass.debug > moveSpills {
+ s.f.Config.Warnl(e.Line, "lost register assignment for spill %v in %v at exit %v to %v",
+ vsp, b, p, d)
+ }
+ nSpillsChanged++
+ continue sinking
+ }
+
+ nSpillsSunk++
+ nSpillsInner--
+ // don't update nSpills, since spill is only moved, and if it is duplicated, the spills-on-a-path is not increased.
+
+ dests = ts.dests
+
+ // remove vsp from b.Values
+ i := 0
+ for _, w := range b.Values {
+ if vsp == w {
+ continue
+ }
+ b.Values[i] = w
+ i++
+ }
+ b.Values = b.Values[:i]
+
+ for i := uint(0); i < 32 && dests != 0; i++ {
+
+ if dests&(1<<i) == 0 {
+ continue
+ }
+
+ dests ^= 1 << i
+
+ d := loop.exits[i]
+ vspnew := d.NewValue1(e.Line, OpStoreReg, e.Type, e)
+
+ if s.f.pass.debug > moveSpills {
+ s.f.Config.Warnl(e.Line, "moved spill %v in %v for %v to %v in %v",
+ vsp, b, e, vspnew, d)
+ }
+
+ f.setHome(vspnew, f.getHome(vsp.ID)) // copy stack home
+
+ // shuffle vspnew to the beginning of its block
+ copy(d.Values[1:], d.Values[0:len(d.Values)-1])
+ d.Values[0] = vspnew
+ }
+ }
+
+ if f.pass.stats > 0 {
+ f.logStat("spills_info",
+ nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed")
+ }
+}
+
+// isLoopSpillCandidate indicates whether the spill for v satisfies preliminary
+// spill-sinking conditions just after the last block of loop has been processed.
+// In particular:
+// v needs a register.
+// v's spill is not (YET) used.
+// v's definition is within loop.
+// The spill may be used in the future, either by an outright use
+// in the code, or by shuffling code inserted after stack allocation.
+// Outright uses cause sinking; shuffling (within the loop) inhibits it.
+func (s *regAllocState) isLoopSpillCandidate(loop *loop, v *Value) bool {
+ return s.values[v.ID].needReg && !s.values[v.ID].spillUsed && s.loopnest.b2l[v.Block.ID] == loop
+}
+
+// lateSpillUse notes a late (after stack allocation) use of spill c
+// This will inhibit spill sinking.
+func (s *regAllocState) lateSpillUse(c *Value) {
+ // TODO investigate why this is necessary.
+ // It appears that an outside-the-loop use of
+ // an otherwise sinkable spill makes the spill
+ // a candidate for shuffling, when it would not
+ // otherwise have been the case (spillUsed was not
+ // true when isLoopSpillCandidate was called, yet
+ // it was shuffled). Such shuffling cuts the amount
+ // of spill sinking by more than half (in make.bash)
+ v := s.orig[c.ID]
+ if v != nil {
+ s.values[v.ID].spillUsedShuffle = true
+ }
}
// shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
if _, isReg := loc.(*Register); isReg {
c = e.p.NewValue1(c.Line, OpCopy, c.Type, c)
} else {
+ e.s.lateSpillUse(c)
c = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c)
}
e.set(r, vid, c, false)
}
} else {
if dstReg {
+ e.s.lateSpillUse(c)
x = e.p.NewValue1(c.Line, OpLoadReg, c.Type, c)
} else {
// mem->mem. Use temp register.
e.erase(loc)
r := e.findRegFor(c.Type)
+ e.s.lateSpillUse(c)
t := e.p.NewValue1(c.Line, OpLoadReg, c.Type, c)
e.set(r, vid, t, false)
x = e.p.NewValue1(c.Line, OpStoreReg, loc.(LocalSlot).Type, t)