cmd/compile: faster liveness analysis in regalloc

author Daniel Morsing <daniel.morsing@gmail.com>

Sat, 9 Aug 2025 19:30:30 +0000 (20:30 +0100)

committer Gopher Robot <gobot@golang.org>

Tue, 4 Nov 2025 21:57:19 +0000 (13:57 -0800)
author Daniel Morsing <daniel.morsing@gmail.com>
Sat, 9 Aug 2025 19:30:30 +0000 (20:30 +0100)
committer Gopher Robot <gobot@golang.org>
Tue, 4 Nov 2025 21:57:19 +0000 (13:57 -0800)
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go

index 0009de4fa69608b2b7b4942adab6d11a71056c41..b5174acbc99ccc208b630ca278d604696c6e88f3 100644 (file)
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -119,10 +119,12 @@ import (
         "cmd/compile/internal/types"
         "cmd/internal/src"
         "cmd/internal/sys"
+       "cmp"
         "fmt"
         "internal/buildcfg"
         "math"
         "math/bits"
+       "slices"
         "unsafe"
  )
  
@@ -1013,9 +1015,11 @@ func (s *regAllocState) regalloc(f *Func) {
                 // Initialize regValLiveSet and uses fields for this block.
                 // Walk backwards through the block doing liveness analysis.
                 regValLiveSet.clear()
-               for _, e := range s.live[b.ID] {
-                       s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
-                       regValLiveSet.add(e.ID)
+               if s.live != nil {
+                       for _, e := range s.live[b.ID] {
+                               s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
+                               regValLiveSet.add(e.ID)
+                       }
                 }
                 for _, v := range b.ControlValues() {
                         if s.values[v.ID].needReg {
@@ -1335,7 +1339,9 @@ func (s *regAllocState) regalloc(f *Func) {
                 }
  
                 // Load static desired register info at the end of the block.
-               desired.copy(&s.desired[b.ID])
+               if s.desired != nil {
+                       desired.copy(&s.desired[b.ID])
+               }
  
                 // Check actual assigned registers at the start of the next block(s).
                 // Dynamically assigned registers will trump the static
@@ -1377,7 +1383,7 @@ func (s *regAllocState) regalloc(f *Func) {
                         }
                 }
                 // Walk values backwards computing desired register info.
-               // See computeLive for more comments.
+               // See computeDesired for more comments.
                 for i := len(oldSched) - 1; i >= 0; i-- {
                         v := oldSched[i]
                         prefs := desired.remove(v.ID)
@@ -2044,8 +2050,10 @@ func (s *regAllocState) regalloc(f *Func) {
  
                 if checkEnabled {
                         regValLiveSet.clear()
-                       for _, x := range s.live[b.ID] {
-                               regValLiveSet.add(x.ID)
+                       if s.live != nil {
+                               for _, x := range s.live[b.ID] {
+                                       regValLiveSet.add(x.ID)
+                               }
                         }
                         for r := register(0); r < s.numRegs; r++ {
                                 v := s.regs[r].v
@@ -2062,37 +2070,39 @@ func (s *regAllocState) regalloc(f *Func) {
                 // isn't in a register, generate a use for the spill location.
                 // We need to remember this information so that
                 // the liveness analysis in stackalloc is correct.
-               for _, e := range s.live[b.ID] {
-                       vi := &s.values[e.ID]
-                       if vi.regs != 0 {
-                               // in a register, we'll use that source for the merge.
-                               continue
-                       }
-                       if vi.rematerializeable {
-                               // we'll rematerialize during the merge.
-                               continue
-                       }
-                       if s.f.pass.debug > regDebug {
-                               fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
+               if s.live != nil {
+                       for _, e := range s.live[b.ID] {
+                               vi := &s.values[e.ID]
+                               if vi.regs != 0 {
+                                       // in a register, we'll use that source for the merge.
+                                       continue
+                               }
+                               if vi.rematerializeable {
+                                       // we'll rematerialize during the merge.
+                                       continue
+                               }
+                               if s.f.pass.debug > regDebug {
+                                       fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
+                               }
+                               spill := s.makeSpill(s.orig[e.ID], b)
+                               s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
                         }
-                       spill := s.makeSpill(s.orig[e.ID], b)
-                       s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
-               }
  
-               // Clear any final uses.
-               // All that is left should be the pseudo-uses added for values which
-               // are live at the end of b.
-               for _, e := range s.live[b.ID] {
-                       u := s.values[e.ID].uses
-                       if u == nil {
-                               f.Fatalf("live at end, no uses v%d", e.ID)
-                       }
-                       if u.next != nil {
-                               f.Fatalf("live at end, too many uses v%d", e.ID)
+                       // Clear any final uses.
+                       // All that is left should be the pseudo-uses added for values which
+                       // are live at the end of b.
+                       for _, e := range s.live[b.ID] {
+                               u := s.values[e.ID].uses
+                               if u == nil {
+                                       f.Fatalf("live at end, no uses v%d", e.ID)
+                               }
+                               if u.next != nil {
+                                       f.Fatalf("live at end, too many uses v%d", e.ID)
+                               }
+                               s.values[e.ID].uses = nil
+                               u.next = s.freeUseRecords
+                               s.freeUseRecords = u
                         }
-                       s.values[e.ID].uses = nil
-                       u.next = s.freeUseRecords
-                       s.freeUseRecords = u
                 }
  
                 // allocReg may have dropped registers from startRegsMask that
@@ -2192,8 +2202,8 @@ func (s *regAllocState) placeSpills() {
                 best := v.Block
                 bestArg := v
                 var bestDepth int16
-               if l := s.loopnest.b2l[best.ID]; l != nil {
-                       bestDepth = l.depth
+               if s.loopnest != nil && s.loopnest.b2l[best.ID] != nil {
+                       bestDepth = s.loopnest.b2l[best.ID].depth
                 }
                 b := best
                 const maxSpillSearch = 100
@@ -2215,8 +2225,8 @@ func (s *regAllocState) placeSpills() {
                         }
  
                         var depth int16
-                       if l := s.loopnest.b2l[b.ID]; l != nil {
-                               depth = l.depth
+                       if s.loopnest != nil && s.loopnest.b2l[b.ID] != nil {
+                               depth = s.loopnest.b2l[b.ID].depth
                         }
                         if depth > bestDepth {
                                 // Don't push the spill into a deeper loop.
@@ -2796,16 +2806,18 @@ type liveInfo struct {
  // computeLive computes a map from block ID to a list of value IDs live at the end
  // of that block. Together with the value ID is a count of how many instructions
  // to the next use of that value. The resulting map is stored in s.live.
-// computeLive also computes the desired register information at the end of each block.
-// This desired register information is stored in s.desired.
-// TODO: this could be quadratic if lots of variables are live across lots of
-// basic blocks. Figure out a way to make this function (or, more precisely, the user
-// of this function) require only linear size & time.
  func (s *regAllocState) computeLive() {
         f := s.f
+       // single block functions do not have variables that are live across
+       // branches
+       if len(f.Blocks) == 1 {
+               return
+       }
+       po := f.postorder()
         s.live = make([][]liveInfo, f.NumBlocks())
         s.desired = make([]desiredState, f.NumBlocks())
-       var phis []*Value
+       s.loopnest = f.loopnest()
+
         rematIDs := make([]ID, 0, 64)
  
         live := f.newSparseMapPos(f.NumValues())
@@ -2813,31 +2825,63 @@ func (s *regAllocState) computeLive() {
         t := f.newSparseMapPos(f.NumValues())
         defer f.retSparseMapPos(t)
  
-       // Keep track of which value we want in each register.
-       var desired desiredState
-
-       // Instead of iterating over f.Blocks, iterate over their postordering.
-       // Liveness information flows backward, so starting at the end
-       // increases the probability that we will stabilize quickly.
-       // TODO: Do a better job yet. Here's one possibility:
-       // Calculate the dominator tree and locate all strongly connected components.
-       // If a value is live in one block of an SCC, it is live in all.
-       // Walk the dominator tree from end to beginning, just once, treating SCC
-       // components as single blocks, duplicated calculated liveness information
-       // out to all of them.
-       po := f.postorder()
-       s.loopnest = f.loopnest()
         s.loopnest.computeUnavoidableCalls()
+
+       // Liveness analysis.
+       // This is an adapted version of the algorithm described in chapter 2.4.2
+       // of Fabrice Rastello's On Sparse Intermediate Representations.
+       //   https://web.archive.org/web/20240417212122if_/https://inria.hal.science/hal-00761555/file/habilitation.pdf#section.50
+       //
+       // For our implementation, we fall back to a traditional iterative algorithm when we encounter
+       // Irreducible CFGs. They are very uncommon in Go code because they need to be constructed with
+       // gotos and our current loopnest definition does not compute all the information that
+       // we'd need to compute the loop ancestors for that step of the algorithm.
+       //
+       // Additionally, instead of only considering non-loop successors in the initial DFS phase,
+       // we compute the liveout as the union of all successors. This larger liveout set is a subset
+       // of the final liveout for the block and adding this information in the DFS phase means that
+       // we get slightly more accurate distance information.
+       var loopLiveIn map[*loop][]liveInfo
+       var numCalls []int32
+       if len(s.loopnest.loops) > 0 && !s.loopnest.hasIrreducible {
+               loopLiveIn = make(map[*loop][]liveInfo)
+               numCalls = f.Cache.allocInt32Slice(f.NumBlocks())
+               defer f.Cache.freeInt32Slice(numCalls)
+       }
+
         for {
                 changed := false
  
                 for _, b := range po {
                         // Start with known live values at the end of the block.
-                       // Add len(b.Values) to adjust from end-of-block distance
-                       // to beginning-of-block distance.
                         live.clear()
                         for _, e := range s.live[b.ID] {
-                               live.set(e.ID, e.dist+int32(len(b.Values)), e.pos)
+                               live.set(e.ID, e.dist, e.pos)
+                       }
+                       update := false
+                       // arguments to phi nodes are live at this blocks out
+                       for _, e := range b.Succs {
+                               succ := e.b
+                               delta := branchDistance(b, succ)
+                               for _, v := range succ.Values {
+                                       if v.Op != OpPhi {
+                                               break
+                                       }
+                                       arg := v.Args[e.i]
+                                       if s.values[arg.ID].needReg && (!live.contains(arg.ID) || delta < live.get(arg.ID)) {
+                                               live.set(arg.ID, delta, v.Pos)
+                                               update = true
+                                       }
+                               }
+                       }
+                       if update {
+                               s.live[b.ID] = updateLive(live, s.live[b.ID])
+                       }
+                       // Add len(b.Values) to adjust from end-of-block distance
+                       // to beginning-of-block distance.
+                       c := live.contents()
+                       for i := range c {
+                               c[i].val += int32(len(b.Values))
                         }
  
                         // Mark control values as live
@@ -2847,18 +2891,16 @@ func (s *regAllocState) computeLive() {
                                 }
                         }
  
-                       // Propagate backwards to the start of the block
-                       // Assumes Values have been scheduled.
-                       phis = phis[:0]
                         for i := len(b.Values) - 1; i >= 0; i-- {
                                 v := b.Values[i]
                                 live.remove(v.ID)
                                 if v.Op == OpPhi {
-                                       // save phi ops for later
-                                       phis = append(phis, v)
                                         continue
                                 }
                                 if opcodeTable[v.Op].call {
+                                       if numCalls != nil {
+                                               numCalls[b.ID]++
+                                       }
                                         rematIDs = rematIDs[:0]
                                         c := live.contents()
                                         for i := range c {
@@ -2881,7 +2923,207 @@ func (s *regAllocState) computeLive() {
                                         }
                                 }
                         }
-                       // Propagate desired registers backwards.
+                       // This is a loop header, save our live-in so that
+                       // we can use it to fill in the loop bodies later
+                       if loopLiveIn != nil {
+                               loop := s.loopnest.b2l[b.ID]
+                               if loop != nil && loop.header.ID == b.ID {
+                                       loopLiveIn[loop] = updateLive(live, nil)
+                               }
+                       }
+                       // For each predecessor of b, expand its list of live-at-end values.
+                       // invariant: live contains the values live at the start of b
+                       for _, e := range b.Preds {
+                               p := e.b
+                               delta := branchDistance(p, b)
+
+                               // Start t off with the previously known live values at the end of p.
+                               t.clear()
+                               for _, e := range s.live[p.ID] {
+                                       t.set(e.ID, e.dist, e.pos)
+                               }
+                               update := false
+
+                               // Add new live values from scanning this block.
+                               for _, e := range live.contents() {
+                                       d := e.val + delta
+                                       if !t.contains(e.key) || d < t.get(e.key) {
+                                               update = true
+                                               t.set(e.key, d, e.pos)
+                                       }
+                               }
+
+                               if !update {
+                                       continue
+                               }
+                               s.live[p.ID] = updateLive(t, s.live[p.ID])
+                               changed = true
+                       }
+               }
+
+               // Doing a traditional iterative algorithm and have run
+               // out of changes
+               if !changed {
+                       break
+               }
+
+               // Doing a pre-pass and will fill in the liveness information
+               // later
+               if loopLiveIn != nil {
+                       break
+               }
+               // For loopless code, we have full liveness info after a single
+               // iteration
+               if len(s.loopnest.loops) == 0 {
+                       break
+               }
+       }
+       if f.pass.debug > regDebug {
+               s.debugPrintLive("after dfs walk", f, s.live, s.desired)
+       }
+
+       // irreducible CFGs and functions without loops are already
+       // done, compute their desired registers and return
+       if loopLiveIn == nil {
+               s.computeDesired()
+               return
+       }
+
+       // Walk the loopnest from outer to inner, adding
+       // all live-in values from their parent. Instead of
+       // a recursive algorithm, iterate in depth order.
+       // TODO(dmo): can we permute the loopnest? can we avoid this copy?
+       loops := slices.Clone(s.loopnest.loops)
+       slices.SortFunc(loops, func(a, b *loop) int {
+               return cmp.Compare(a.depth, b.depth)
+       })
+
+       loopset := f.newSparseMapPos(f.NumValues())
+       defer f.retSparseMapPos(loopset)
+       for _, loop := range loops {
+               if loop.outer == nil {
+                       continue
+               }
+               livein := loopLiveIn[loop]
+               loopset.clear()
+               for _, l := range livein {
+                       loopset.set(l.ID, l.dist, l.pos)
+               }
+               update := false
+               for _, l := range loopLiveIn[loop.outer] {
+                       if !loopset.contains(l.ID) {
+                               loopset.set(l.ID, l.dist, l.pos)
+                               update = true
+                       }
+               }
+               if update {
+                       loopLiveIn[loop] = updateLive(loopset, livein)
+               }
+       }
+       // unknownDistance is a sentinel value for when we know a variable
+       // is live at any given block, but we do not yet know how far until it's next
+       // use. The distance will be computed later.
+       const unknownDistance = -1
+
+       // add live-in values of the loop headers to their children.
+       // This includes the loop headers themselves, since they can have values
+       // that die in the middle of the block and aren't live-out
+       for _, b := range po {
+               loop := s.loopnest.b2l[b.ID]
+               if loop == nil {
+                       continue
+               }
+               headerLive := loopLiveIn[loop]
+               loopset.clear()
+               for _, l := range s.live[b.ID] {
+                       loopset.set(l.ID, l.dist, l.pos)
+               }
+               update := false
+               for _, l := range headerLive {
+                       if !loopset.contains(l.ID) {
+                               loopset.set(l.ID, unknownDistance, src.NoXPos)
+                               update = true
+                       }
+               }
+               if update {
+                       s.live[b.ID] = updateLive(loopset, s.live[b.ID])
+               }
+       }
+       if f.pass.debug > regDebug {
+               s.debugPrintLive("after live loop prop", f, s.live, s.desired)
+       }
+       // Filling in liveness from loops leaves some blocks with no distance information
+       // Run over them and fill in the information from their successors.
+       // To stabilize faster, we quit when no block has missing values and we only
+       // look at blocks that still have missing values in subsequent iterations
+       unfinishedBlocks := f.Cache.allocBlockSlice(len(po))
+       defer f.Cache.freeBlockSlice(unfinishedBlocks)
+       copy(unfinishedBlocks, po)
+
+       for len(unfinishedBlocks) > 0 {
+               n := 0
+               for _, b := range unfinishedBlocks {
+                       live.clear()
+                       unfinishedValues := 0
+                       for _, l := range s.live[b.ID] {
+                               if l.dist == unknownDistance {
+                                       unfinishedValues++
+                               }
+                               live.set(l.ID, l.dist, l.pos)
+                       }
+                       update := false
+                       for _, e := range b.Succs {
+                               succ := e.b
+                               for _, l := range s.live[succ.ID] {
+                                       if !live.contains(l.ID) || l.dist == unknownDistance {
+                                               continue
+                                       }
+                                       dist := int32(len(succ.Values)) + l.dist + branchDistance(b, succ)
+                                       dist += numCalls[succ.ID] * unlikelyDistance
+                                       val := live.get(l.ID)
+                                       switch {
+                                       case val == unknownDistance:
+                                               unfinishedValues--
+                                               fallthrough
+                                       case dist < val:
+                                               update = true
+                                               live.set(l.ID, dist, l.pos)
+                                       }
+                               }
+                       }
+                       if update {
+                               s.live[b.ID] = updateLive(live, s.live[b.ID])
+                       }
+                       if unfinishedValues > 0 {
+                               unfinishedBlocks[n] = b
+                               n++
+                       }
+               }
+               unfinishedBlocks = unfinishedBlocks[:n]
+       }
+
+       s.computeDesired()
+
+       if f.pass.debug > regDebug {
+               s.debugPrintLive("final", f, s.live, s.desired)
+       }
+}
+
+// computeDesired computes the desired register information at the end of each block.
+// It is essentially a liveness analysis on machine registers instead of SSA values
+// The desired register information is stored in s.desired.
+func (s *regAllocState) computeDesired() {
+
+       // TODO: Can we speed this up using the liveness information we have already
+       // from computeLive?
+       // TODO: Since we don't propagate information through phi nodes, can we do
+       // this as a single dominator tree walk instead of the iterative solution?
+       var desired desiredState
+       f := s.f
+       po := f.postorder()
+       for {
+               changed := false
+               for _, b := range po {
                         desired.copy(&s.desired[b.ID])
                         for i := len(b.Values) - 1; i >= 0; i-- {
                                 v := b.Values[i]
@@ -2916,106 +3158,85 @@ func (s *regAllocState) computeLive() {
                                         desired.addList(v.Args[0].ID, prefs)
                                 }
                         }
-
-                       // For each predecessor of b, expand its list of live-at-end values.
-                       // invariant: live contains the values live at the start of b (excluding phi inputs)
-                       for i, e := range b.Preds {
+                       for _, e := range b.Preds {
                                 p := e.b
-                               // Compute additional distance for the edge.
-                               // Note: delta must be at least 1 to distinguish the control
-                               // value use from the first user in a successor block.
-                               delta := int32(normalDistance)
-                               if len(p.Succs) == 2 {
-                                       if p.Succs[0].b == b && p.Likely == BranchLikely ||
-                                               p.Succs[1].b == b && p.Likely == BranchUnlikely {
-                                               delta = likelyDistance
-                                       }
-                                       if p.Succs[0].b == b && p.Likely == BranchUnlikely ||
-                                               p.Succs[1].b == b && p.Likely == BranchLikely {
-                                               delta = unlikelyDistance
-                                       }
-                               }
+                               changed = s.desired[p.ID].merge(&desired) || changed
+                       }
+               }
+               if !changed || (!s.loopnest.hasIrreducible && len(s.loopnest.loops) == 0) {
+                       break
+               }
+       }
+}
  
-                               // Update any desired registers at the end of p.
-                               s.desired[p.ID].merge(&desired)
+// updateLive updates a given liveInfo slice with the contents of t
+func updateLive(t *sparseMapPos, live []liveInfo) []liveInfo {
+       live = live[:0]
+       if cap(live) < t.size() {
+               live = make([]liveInfo, 0, t.size())
+       }
+       for _, e := range t.contents() {
+               live = append(live, liveInfo{e.key, e.val, e.pos})
+       }
+       return live
+}
  
-                               // Start t off with the previously known live values at the end of p.
-                               t.clear()
-                               for _, e := range s.live[p.ID] {
-                                       t.set(e.ID, e.dist, e.pos)
-                               }
-                               update := false
+// branchDistance calculates the distance between a block and a
+// successor in pseudo-instructions. This is used to indicate
+// likeliness
+func branchDistance(b *Block, s *Block) int32 {
+       if len(b.Succs) == 2 {
+               if b.Succs[0].b == s && b.Likely == BranchLikely ||
+                       b.Succs[1].b == s && b.Likely == BranchUnlikely {
+                       return likelyDistance
+               }
+               if b.Succs[0].b == s && b.Likely == BranchUnlikely ||
+                       b.Succs[1].b == s && b.Likely == BranchLikely {
+                       return unlikelyDistance
+               }
+       }
+       // Note: the branch distance must be at least 1 to distinguish the control
+       // value use from the first user in a successor block.
+       return normalDistance
+}
  
-                               // Add new live values from scanning this block.
-                               for _, e := range live.contents() {
-                                       d := e.val + delta
-                                       if !t.contains(e.key) || d < t.get(e.key) {
-                                               update = true
-                                               t.set(e.key, d, e.pos)
-                                       }
-                               }
-                               // Also add the correct arg from the saved phi values.
-                               // All phis are at distance delta (we consider them
-                               // simultaneously happening at the start of the block).
-                               for _, v := range phis {
-                                       id := v.Args[i].ID
-                                       if s.values[id].needReg && (!t.contains(id) || delta < t.get(id)) {
-                                               update = true
-                                               t.set(id, delta, v.Pos)
-                                       }
-                               }
+func (s *regAllocState) debugPrintLive(stage string, f *Func, live [][]liveInfo, desired []desiredState) {
+       fmt.Printf("%s: live values at end of each block: %s\n", stage, f.Name)
+       for _, b := range f.Blocks {
+               s.debugPrintLiveBlock(b, live[b.ID], &desired[b.ID])
+       }
+}
  
-                               if !update {
+func (s *regAllocState) debugPrintLiveBlock(b *Block, live []liveInfo, desired *desiredState) {
+       fmt.Printf("  %s:", b)
+       slices.SortFunc(live, func(a, b liveInfo) int {
+               return cmp.Compare(a.ID, b.ID)
+       })
+       for _, x := range live {
+               fmt.Printf(" v%d(%d)", x.ID, x.dist)
+               for _, e := range desired.entries {
+                       if e.ID != x.ID {
+                               continue
+                       }
+                       fmt.Printf("[")
+                       first := true
+                       for _, r := range e.regs {
+                               if r == noRegister {
                                         continue
                                 }
-                               // The live set has changed, update it.
-                               l := s.live[p.ID][:0]
-                               if cap(l) < t.size() {
-                                       l = make([]liveInfo, 0, t.size())
+                               if !first {
+                                       fmt.Printf(",")
                                 }
-                               for _, e := range t.contents() {
-                                       l = append(l, liveInfo{e.key, e.val, e.pos})
-                               }
-                               s.live[p.ID] = l
-                               changed = true
+                               fmt.Print(&s.registers[r])
+                               first = false
                         }
-               }
-
-               if !changed {
-                       break
+                       fmt.Printf("]")
                 }
         }
-       if f.pass.debug > regDebug {
-               fmt.Println("live values at end of each block")
-               for _, b := range f.Blocks {
-                       fmt.Printf("  %s:", b)
-                       for _, x := range s.live[b.ID] {
-                               fmt.Printf(" v%d(%d)", x.ID, x.dist)
-                               for _, e := range s.desired[b.ID].entries {
-                                       if e.ID != x.ID {
-                                               continue
-                                       }
-                                       fmt.Printf("[")
-                                       first := true
-                                       for _, r := range e.regs {
-                                               if r == noRegister {
-                                                       continue
-                                               }
-                                               if !first {
-                                                       fmt.Printf(",")
-                                               }
-                                               fmt.Print(&s.registers[r])
-                                               first = false
-                                       }
-                                       fmt.Printf("]")
-                               }
-                       }
-                       if avoid := s.desired[b.ID].avoid; avoid != 0 {
-                               fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
-                       }
-                       fmt.Println()
-               }
+       if avoid := desired.avoid; avoid != 0 {
+               fmt.Printf(" avoid=%v", s.RegMaskString(avoid))
         }
+       fmt.Println()
  }
  
  // A desiredState represents desired register assignments.
@@ -3131,14 +3352,17 @@ func (d *desiredState) remove(vid ID) [4]register {
         return [4]register{noRegister, noRegister, noRegister, noRegister}
  }
  
-// merge merges another desired state x into d.
-func (d *desiredState) merge(x *desiredState) {
+// merge merges another desired state x into d. Returns whether the set has
+// changed
+func (d *desiredState) merge(x *desiredState) bool {
+       oldAvoid := d.avoid
         d.avoid |= x.avoid
         // There should only be a few desired registers, so
         // linear insert is ok.
         for _, e := range x.entries {
                 d.addList(e.ID, e.regs)
         }
+       return oldAvoid != d.avoid
  }
  
  // computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
author	Daniel Morsing <daniel.morsing@gmail.com>
	Sat, 9 Aug 2025 19:30:30 +0000 (20:30 +0100)
committer	Gopher Robot <gobot@golang.org>
	Tue, 4 Nov 2025 21:57:19 +0000 (13:57 -0800)