"cmd/compile/internal/types"
"cmd/internal/src"
"cmd/internal/sys"
+ "cmp"
"fmt"
"internal/buildcfg"
"math"
"math/bits"
+ "slices"
"unsafe"
)
// Initialize regValLiveSet and uses fields for this block.
// Walk backwards through the block doing liveness analysis.
regValLiveSet.clear()
- for _, e := range s.live[b.ID] {
- s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
- regValLiveSet.add(e.ID)
+ if s.live != nil {
+ for _, e := range s.live[b.ID] {
+ s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
+ regValLiveSet.add(e.ID)
+ }
}
for _, v := range b.ControlValues() {
if s.values[v.ID].needReg {
}
// Load static desired register info at the end of the block.
- desired.copy(&s.desired[b.ID])
+ if s.desired != nil {
+ desired.copy(&s.desired[b.ID])
+ }
// Check actual assigned registers at the start of the next block(s).
// Dynamically assigned registers will trump the static
}
}
// Walk values backwards computing desired register info.
- // See computeLive for more comments.
+ // See computeDesired for more comments.
for i := len(oldSched) - 1; i >= 0; i-- {
v := oldSched[i]
prefs := desired.remove(v.ID)
if checkEnabled {
regValLiveSet.clear()
- for _, x := range s.live[b.ID] {
- regValLiveSet.add(x.ID)
+ if s.live != nil {
+ for _, x := range s.live[b.ID] {
+ regValLiveSet.add(x.ID)
+ }
}
for r := register(0); r < s.numRegs; r++ {
v := s.regs[r].v
// isn't in a register, generate a use for the spill location.
// We need to remember this information so that
// the liveness analysis in stackalloc is correct.
- for _, e := range s.live[b.ID] {
- vi := &s.values[e.ID]
- if vi.regs != 0 {
- // in a register, we'll use that source for the merge.
- continue
- }
- if vi.rematerializeable {
- // we'll rematerialize during the merge.
- continue
- }
- if s.f.pass.debug > regDebug {
- fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
+ if s.live != nil {
+ for _, e := range s.live[b.ID] {
+ vi := &s.values[e.ID]
+ if vi.regs != 0 {
+ // in a register, we'll use that source for the merge.
+ continue
+ }
+ if vi.rematerializeable {
+ // we'll rematerialize during the merge.
+ continue
+ }
+ if s.f.pass.debug > regDebug {
+ fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
+ }
+ spill := s.makeSpill(s.orig[e.ID], b)
+ s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
}
- spill := s.makeSpill(s.orig[e.ID], b)
- s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
- }
- // Clear any final uses.
- // All that is left should be the pseudo-uses added for values which
- // are live at the end of b.
- for _, e := range s.live[b.ID] {
- u := s.values[e.ID].uses
- if u == nil {
- f.Fatalf("live at end, no uses v%d", e.ID)
- }
- if u.next != nil {
- f.Fatalf("live at end, too many uses v%d", e.ID)
+ // Clear any final uses.
+ // All that is left should be the pseudo-uses added for values which
+ // are live at the end of b.
+ for _, e := range s.live[b.ID] {
+ u := s.values[e.ID].uses
+ if u == nil {
+ f.Fatalf("live at end, no uses v%d", e.ID)
+ }
+ if u.next != nil {
+ f.Fatalf("live at end, too many uses v%d", e.ID)
+ }
+ s.values[e.ID].uses = nil
+ u.next = s.freeUseRecords
+ s.freeUseRecords = u
}
- s.values[e.ID].uses = nil
- u.next = s.freeUseRecords
- s.freeUseRecords = u
}
// allocReg may have dropped registers from startRegsMask that
best := v.Block
bestArg := v
var bestDepth int16
- if l := s.loopnest.b2l[best.ID]; l != nil {
- bestDepth = l.depth
+ if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
+ bestDepth = s.loopnest.b2l[best.ID].depth
}
b := best
const maxSpillSearch = 100
}
var depth int16
- if l := s.loopnest.b2l[b.ID]; l != nil {
- depth = l.depth
+ if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
+ depth = s.loopnest.b2l[b.ID].depth
}
if depth > bestDepth {
// Don't push the spill into a deeper loop.
// computeLive computes a map from block ID to a list of value IDs live at the end
// of that block. Together with the value ID is a count of how many instructions
// to the next use of that value. The resulting map is stored in s.live.
-// computeLive also computes the desired register information at the end of each block.
-// This desired register information is stored in s.desired.
-// TODO: this could be quadratic if lots of variables are live across lots of
-// basic blocks. Figure out a way to make this function (or, more precisely, the user
-// of this function) require only linear size & time.
func (s *regAllocState) computeLive() {
f := s.f
+ // single block functions do not have variables that are live across
+ // branches
+ if len(f.Blocks) == 1 {
+ return
+ }
+ po := f.postorder()
s.live = make([][]liveInfo, f.NumBlocks())
s.desired = make([]desiredState, f.NumBlocks())
- var phis []*Value
+ s.loopnest = f.loopnest()
+
rematIDs := make([]ID, 0, 64)
live := f.newSparseMapPos(f.NumValues())
t := f.newSparseMapPos(f.NumValues())
defer f.retSparseMapPos(t)
- // Keep track of which value we want in each register.
- var desired desiredState
-
- // Instead of iterating over f.Blocks, iterate over their postordering.
- // Liveness information flows backward, so starting at the end
- // increases the probability that we will stabilize quickly.
- // TODO: Do a better job yet. Here's one possibility:
- // Calculate the dominator tree and locate all strongly connected components.
- // If a value is live in one block of an SCC, it is live in all.
- // Walk the dominator tree from end to beginning, just once, treating SCC
- // components as single blocks, duplicated calculated liveness information
- // out to all of them.
- po := f.postorder()
- s.loopnest = f.loopnest()
s.loopnest.computeUnavoidableCalls()
+
+ // Liveness analysis.
+ // This is an adapted version of the algorithm described in chapter 2.4.2
+ // of Fabrice Rastello's On Sparse Intermediate Representations.
+ // https://web.archive.org/web/20240417212122if_/https://inria.hal.science/hal-00761555/file/habilitation.pdf#section.50
+ //
+ // For our implementation, we fall back to a traditional iterative algorithm when we encounter
+ // Irreducible CFGs. They are very uncommon in Go code because they need to be constructed with
+ // gotos and our current loopnest definition does not compute all the information that
+ // we'd need to compute the loop ancestors for that step of the algorithm.
+ //
+ // Additionally, instead of only considering non-loop successors in the initial DFS phase,
+ // we compute the liveout as the union of all successors. This larger liveout set is a subset
+ // of the final liveout for the block and adding this information in the DFS phase means that
+ // we get slightly more accurate distance information.
+ var loopLiveIn map[*loop][]liveInfo
+ var numCalls []int32
+ if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
+ loopLiveIn = make(map[*loop][]liveInfo)
+ numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
+ defer f.Cache.freeInt32Slice(numCalls)
+ }
+
for {
changed := false
for _, b := range po {
// Start with known live values at the end of the block.
- // Add len(b.Values) to adjust from end-of-block distance
- // to beginning-of-block distance.
live.clear()
for _, e := range s.live[b.ID] {
- live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
+ live.set(e.ID, e.dist, e.pos)
+ }
+ update := false
+ // arguments to phi nodes are live at this blocks out
+ for _, e := range b.Succs {
+ succ := e.b
+ delta := branchDistance(b, succ)
+ for _, v := range succ.Values {
+ if v.Op != OpPhi {
+ break
+ }
+ arg := v.Args[e.i]
+ if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
+ live.set(arg.ID, delta, v.Pos)
+ update = true
+ }
+ }
+ }
+ if update {
+ s.live[b.ID] = updateLive(live, s.live[b.ID])
+ }
+ // Add len(b.Values) to adjust from end-of-block distance
+ // to beginning-of-block distance.
+ c := live.contents()
+ for i := range c {
+ c[i].val += int32(len(b.Values))
}
// Mark control values as live
}
}
- // Propagate backwards to the start of the block
- // Assumes Values have been scheduled.
- phis = phis[:0]
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
live.remove(v.ID)
if v.Op == OpPhi {
- // save phi ops for later
- phis = append(phis, v)
continue
}
if opcodeTable[v.Op].call {
+ if numCalls != nil {
+ numCalls[b.ID]++
+ }
rematIDs = rematIDs[:0]
c := live.contents()
for i := range c {
}
}
}
- // Propagate desired registers backwards.
+ // This is a loop header, save our live-in so that
+ // we can use it to fill in the loop bodies later
+ if loopLiveIn != nil {
+ loop := s.loopnest.b2l[b.ID]
+ if loop != nil && loop.header.ID == b.ID {
+ loopLiveIn[loop] = updateLive(live, nil)
+ }
+ }
+ // For each predecessor of b, expand its list of live-at-end values.
+ // invariant: live contains the values live at the start of b
+ for _, e := range b.Preds {
+ p := e.b
+ delta := branchDistance(p, b)
+
+ // Start t off with the previously known live values at the end of p.
+ t.clear()
+ for _, e := range s.live[p.ID] {
+ t.set(e.ID, e.dist, e.pos)
+ }
+ update := false
+
+ // Add new live values from scanning this block.
+ for _, e := range live.contents() {
+ d := e.val + delta
+ if !t.contains(e.key) || d < t.get(e.key) {
+ update = true
+ t.set(e.key, d, e.pos)
+ }
+ }
+
+ if !update {
+ continue
+ }
+ s.live[p.ID] = updateLive(t, s.live[p.ID])
+ changed = true
+ }
+ }
+
+ // Doing a traditional iterative algorithm and have run
+ // out of changes
+ if !changed {
+ break
+ }
+
+ // Doing a pre-pass and will fill in the liveness information
+ // later
+ if loopLiveIn != nil {
+ break
+ }
+ // For loopless code, we have full liveness info after a single
+ // iteration
+ if len(s.loopnest.loops) == 0 {
+ break
+ }
+ }
+ if f.pass.debug > regDebug {
+ s.debugPrintLive("after dfs walk", f, s.live, s.desired)
+ }
+
+ // irreducible CFGs and functions without loops are already
+ // done, compute their desired registers and return
+ if loopLiveIn == nil {
+ s.computeDesired()
+ return
+ }
+
+ // Walk the loopnest from outer to inner, adding
+ // all live-in values from their parent. Instead of
+ // a recursive algorithm, iterate in depth order.
+ // TODO(dmo): can we permute the loopnest? can we avoid this copy?
+ loops := slices.Clone(s.loopnest.loops)
+ slices.SortFunc(loops, func(a, b *loop) int {
+ return cmp.Compare(a.depth, b.depth)
+ })
+
+ loopset := f.newSparseMapPos(f.NumValues())
+ defer f.retSparseMapPos(loopset)
+ for _, loop := range loops {
+ if loop.outer == nil {
+ continue
+ }
+ livein := loopLiveIn[loop]
+ loopset.clear()
+ for _, l := range livein {
+ loopset.set(l.ID, l.dist, l.pos)
+ }
+ update := false
+ for _, l := range loopLiveIn[loop.outer] {
+ if !loopset.contains(l.ID) {
+ loopset.set(l.ID, l.dist, l.pos)
+ update = true
+ }
+ }
+ if update {
+ loopLiveIn[loop] = updateLive(loopset, livein)
+ }
+ }
+ // unknownDistance is a sentinel value for when we know a variable
+ // is live at any given block, but we do not yet know how far until it's next
+ // use. The distance will be computed later.
+ const unknownDistance = -1
+
+ // add live-in values of the loop headers to their children.
+ // This includes the loop headers themselves, since they can have values
+ // that die in the middle of the block and aren't live-out
+ for _, b := range po {
+ loop := s.loopnest.b2l[b.ID]
+ if loop == nil {
+ continue
+ }
+ headerLive := loopLiveIn[loop]
+ loopset.clear()
+ for _, l := range s.live[b.ID] {
+ loopset.set(l.ID, l.dist, l.pos)
+ }
+ update := false
+ for _, l := range headerLive {
+ if !loopset.contains(l.ID) {
+ loopset.set(l.ID, unknownDistance, src.NoXPos)
+ update = true
+ }
+ }
+ if update {
+ s.live[b.ID] = updateLive(loopset, s.live[b.ID])
+ }
+ }
+ if f.pass.debug > regDebug {
+ s.debugPrintLive("after live loop prop", f, s.live, s.desired)
+ }
+ // Filling in liveness from loops leaves some blocks with no distance information
+ // Run over them and fill in the information from their successors.
+ // To stabilize faster, we quit when no block has missing values and we only
+ // look at blocks that still have missing values in subsequent iterations
+ unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
+ defer f.Cache.freeBlockSlice(unfinishedBlocks)
+ copy(unfinishedBlocks, po)
+
+ for len(unfinishedBlocks) > 0 {
+ n := 0
+ for _, b := range unfinishedBlocks {
+ live.clear()
+ unfinishedValues := 0
+ for _, l := range s.live[b.ID] {
+ if l.dist == unknownDistance {
+ unfinishedValues++
+ }
+ live.set(l.ID, l.dist, l.pos)
+ }
+ update := false
+ for _, e := range b.Succs {
+ succ := e.b
+ for _, l := range s.live[succ.ID] {
+ if !live.contains(l.ID) || l.dist == unknownDistance {
+ continue
+ }
+ dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
+ dist += numCalls[succ.ID] * unlikelyDistance
+ val := live.get(l.ID)
+ switch {
+ case val == unknownDistance:
+ unfinishedValues--
+ fallthrough
+ case dist < val:
+ update = true
+ live.set(l.ID, dist, l.pos)
+ }
+ }
+ }
+ if update {
+ s.live[b.ID] = updateLive(live, s.live[b.ID])
+ }
+ if unfinishedValues > 0 {
+ unfinishedBlocks[n] = b
+ n++
+ }
+ }
+ unfinishedBlocks = unfinishedBlocks[:n]
+ }
+
+ s.computeDesired()
+
+ if f.pass.debug > regDebug {
+ s.debugPrintLive("final", f, s.live, s.desired)
+ }
+}
+
+// computeDesired computes the desired register information at the end of each block.
+// It is essentially a liveness analysis on machine registers instead of SSA values
+// The desired register information is stored in s.desired.
+func (s *regAllocState) computeDesired() {
+
+ // TODO: Can we speed this up using the liveness information we have already
+ // from computeLive?
+ // TODO: Since we don't propagate information through phi nodes, can we do
+ // this as a single dominator tree walk instead of the iterative solution?
+ var desired desiredState
+ f := s.f
+ po := f.postorder()
+ for {
+ changed := false
+ for _, b := range po {
desired.copy(&s.desired[b.ID])
for i := len(b.Values) - 1; i >= 0; i-- {
v := b.Values[i]
desired.addList(v.Args[0].ID, prefs)
}
}
-
- // For each predecessor of b, expand its list of live-at-end values.
- // invariant: live contains the values live at the start of b (excluding phi inputs)
- for i, e := range b.Preds {
+ for _, e := range b.Preds {
p := e.b
- // Compute additional distance for the edge.
- // Note: delta must be at least 1 to distinguish the control
- // value use from the first user in a successor block.
- delta := int32(normalDistance)
- if len(p.Succs) == 2 {
- if p.Succs[0].b == b && p.Likely == BranchLikely ||
- p.Succs[1].b == b && p.Likely == BranchUnlikely {
- delta = likelyDistance
- }
- if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
- p.Succs[1].b == b && p.Likely == BranchLikely {
- delta = unlikelyDistance
- }
- }
+ changed = s.desired[p.ID].merge(&desired) || changed
+ }
+ }
+ if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
+ break
+ }
+ }
+}
- // Update any desired registers at the end of p.
- s.desired[p.ID].merge(&desired)
+// updateLive updates a given liveInfo slice with the contents of t
+func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
+ live = live[:0]
+ if cap(live) < t.size() {
+ live = make([]liveInfo, 0, t.size())
+ }
+ for _, e := range t.contents() {
+ live = append(live, liveInfo{e.key, e.val, e.pos})
+ }
+ return live
+}
- // Start t off with the previously known live values at the end of p.
- t.clear()
- for _, e := range s.live[p.ID] {
- t.set(e.ID, e.dist, e.pos)
- }
- update := false
+// branchDistance calculates the distance between a block and a
+// successor in pseudo-instructions. This is used to indicate
+// likeliness
+func branchDistance(b *Block, s *Block) int32 {
+ if len(b.Succs) == 2 {
+ if b.Succs[0].b == s && b.Likely == BranchLikely ||
+ b.Succs[1].b == s && b.Likely == BranchUnlikely {
+ return likelyDistance
+ }
+ if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
+ b.Succs[1].b == s && b.Likely == BranchLikely {
+ return unlikelyDistance
+ }
+ }
+ // Note: the branch distance must be at least 1 to distinguish the control
+ // value use from the first user in a successor block.
+ return normalDistance
+}
- // Add new live values from scanning this block.
- for _, e := range live.contents() {
- d := e.val + delta
- if !t.contains(e.key) || d < t.get(e.key) {
- update = true
- t.set(e.key, d, e.pos)
- }
- }
- // Also add the correct arg from the saved phi values.
- // All phis are at distance delta (we consider them
- // simultaneously happening at the start of the block).
- for _, v := range phis {
- id := v.Args[i].ID
- if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
- update = true
- t.set(id, delta, v.Pos)
- }
- }
+func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
+ fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
+ for _, b := range f.Blocks {
+ s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
+ }
+}
- if !update {
+func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
+ fmt.Printf(" %s:", b)
+ slices.SortFunc(live, func(a, b liveInfo) int {
+ return cmp.Compare(a.ID, b.ID)
+ })
+ for _, x := range live {
+ fmt.Printf(" v%d(%d)", x.ID, x.dist)
+ for _, e := range desired.entries {
+ if e.ID != x.ID {
+ continue
+ }
+ fmt.Printf("[")
+ first := true
+ for _, r := range e.regs {
+ if r == noRegister {
continue
}
- // The live set has changed, update it.
- l := s.live[p.ID][:0]
- if cap(l) < t.size() {
- l = make([]liveInfo, 0, t.size())
+ if !first {
+ fmt.Printf(",")
}
- for _, e := range t.contents() {
- l = append(l, liveInfo{e.key, e.val, e.pos})
- }
- s.live[p.ID] = l
- changed = true
+ fmt.Print(&s.registers[r])
+ first = false
}
- }
-
- if !changed {
- break
+ fmt.Printf("]")
}
}
- if f.pass.debug > regDebug {
- fmt.Println("live values at end of each block")
- for _, b := range f.Blocks {
- fmt.Printf(" %s:", b)
- for _, x := range s.live[b.ID] {
- fmt.Printf(" v%d(%d)", x.ID, x.dist)
- for _, e := range s.desired[b.ID].entries {
- if e.ID != x.ID {
- continue
- }
- fmt.Printf("[")
- first := true
- for _, r := range e.regs {
- if r == noRegister {
- continue
- }
- if !first {
- fmt.Printf(",")
- }
- fmt.Print(&s.registers[r])
- first = false
- }
- fmt.Printf("]")
- }
- }
- if avoid := s.desired[b.ID].avoid; avoid != 0 {
- fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
- }
- fmt.Println()
- }
+ if avoid := desired.avoid; avoid != 0 {
+ fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
}
+ fmt.Println()
}
// A desiredState represents desired register assignments.
return [4]register{noRegister, noRegister, noRegister, noRegister}
}
-// merge merges another desired state x into d.
-func (d *desiredState) merge(x *desiredState) {
+// merge merges another desired state x into d. Returns whether the set has
+// changed
+func (d *desiredState) merge(x *desiredState) bool {
+ oldAvoid := d.avoid
d.avoid |= x.avoid
// There should only be a few desired registers, so
// linear insert is ok.
for _, e := range x.entries {
d.addList(e.ID, e.regs)
}
+ return oldAvoid != d.avoid
}
// computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.