From 5045fdd8ff8769c34e837fbfb0894a72dfa73b4d Mon Sep 17 00:00:00 2001
From: Keith Randall <khr@golang.org>
Date: Tue, 10 Jun 2025 14:37:47 -0700
Subject: [PATCH] cmd/compile: fix containsUnavoidableCall computation

The previous algorithm was incorrect, as it reused the dominatedByCall
slice without resetting it. It also used the depth fields even though
they were not yet calculated.

Also, clean up a lot of the loop detector code that we never use.

Always compute depths. It is cheap.

Update #71868

Not really sure how to test this. As it is just an advisory bit,
nothing goes really wrong when the result is incorrect.

Change-Id: Ic0ae87a4d3576554831252d88b05b058ca68af41
Reviewed-on: https://go-review.googlesource.com/c/go/+/680775
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
---
 src/cmd/compile/internal/ssa/likelyadjust.go | 208 +++----------------
 src/cmd/compile/internal/ssa/regalloc.go     |  71 ++++++-
 src/cmd/compile/internal/ssa/rewrite.go      |   1 -
 src/cmd/compile/internal/ssa/tighten.go      |   1 -
 4 files changed, 103 insertions(+), 178 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/likelyadjust.go b/src/cmd/compile/internal/ssa/likelyadjust.go
index 1dfb53d355..56238ded1e 100644
--- a/src/cmd/compile/internal/ssa/likelyadjust.go
+++ b/src/cmd/compile/internal/ssa/likelyadjust.go
@@ -12,18 +12,15 @@ type loop struct {
 	header *Block // The header node of this (reducible) loop
 	outer  *loop  // loop containing this loop
 
-	// By default, children, exits, and depth are not initialized.
-	children []*loop  // loops nested directly within this loop. Initialized by assembleChildren().
-	exits    []*Block // exits records blocks reached by exits from this loop. Initialized by findExits().
-
 	// Next three fields used by regalloc and/or
 	// aid in computation of inner-ness and list of blocks.
 	nBlocks int32 // Number of blocks in this loop but not within inner loops
 	depth   int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths().
 	isInner bool  // True if never discovered to contain a loop
 
-	// register allocation uses this.
-	containsUnavoidableCall bool // True if all paths through the loop have a call
+	// True if all paths through the loop have a call.
+	// Computed and used by regalloc; stored here for convenience.
+	containsUnavoidableCall bool
 }
 
 // outerinner records that outer contains inner
@@ -49,18 +46,6 @@ func (sdom SparseTree) outerinner(outer, inner *loop) {
 	outer.isInner = false
 }
 
-func checkContainsCall(bb *Block) bool {
-	if bb.Kind == BlockDefer {
-		return true
-	}
-	for _, v := range bb.Values {
-		if opcodeTable[v.Op].call {
-			return true
-		}
-	}
-	return false
-}
-
 type loopnest struct {
 	f              *Func
 	b2l            []*loop
@@ -68,9 +53,6 @@ type loopnest struct {
 	sdom           SparseTree
 	loops          []*loop
 	hasIrreducible bool // TODO current treatment of irreducible loops is very flaky, if accurate loops are needed, must punt at function level.
-
-	// Record which of the lazily initialized fields have actually been initialized.
-	initializedChildren, initializedDepth, initializedExits bool
 }
 
 const (
@@ -355,91 +337,59 @@ func loopnestfor(f *Func) *loopnest {
 		visited[b.ID] = true
 	}
 
-	ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
-
-	// Calculate containsUnavoidableCall for regalloc
-	dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks())
-	defer f.Cache.freeBoolSlice(dominatedByCall)
-	for _, b := range po {
-		if checkContainsCall(b) {
-			dominatedByCall[b.ID] = true
-		}
-	}
-	// Run dfs to find path through the loop that avoids all calls.
-	// Such path either escapes loop or return back to header.
-	// It isn't enough to have exit not dominated by any call, for example:
-	// ... some loop
-	// call1   call2
-	//   \      /
-	//     exit
-	// ...
-	// exit is not dominated by any call, but we don't have call-free path to it.
+	// Compute depths.
 	for _, l := range loops {
-		// Header contains call.
-		if dominatedByCall[l.header.ID] {
-			l.containsUnavoidableCall = true
+		if l.depth != 0 {
+			// Already computed because it is an ancestor of
+			// a previous loop.
 			continue
 		}
-		callfreepath := false
-		tovisit := make([]*Block, 0, len(l.header.Succs))
-		// Push all non-loop non-exit successors of header onto toVisit.
-		for _, s := range l.header.Succs {
-			nb := s.Block()
-			// This corresponds to loop with zero iterations.
-			if !l.iterationEnd(nb, b2l) {
-				tovisit = append(tovisit, nb)
+		// Find depth by walking up the loop tree.
+		d := int16(0)
+		for x := l; x != nil; x = x.outer {
+			if x.depth != 0 {
+				d += x.depth
+				break
 			}
+			d++
 		}
-		for len(tovisit) > 0 {
-			cur := tovisit[len(tovisit)-1]
-			tovisit = tovisit[:len(tovisit)-1]
-			if dominatedByCall[cur.ID] {
-				continue
-			}
-			// Record visited in dominatedByCall.
-			dominatedByCall[cur.ID] = true
-			for _, s := range cur.Succs {
-				nb := s.Block()
-				if l.iterationEnd(nb, b2l) {
-					callfreepath = true
-				}
-				if !dominatedByCall[nb.ID] {
-					tovisit = append(tovisit, nb)
-				}
-
-			}
-			if callfreepath {
+		// Set depth for every ancestor.
+		for x := l; x != nil; x = x.outer {
+			if x.depth != 0 {
 				break
 			}
+			x.depth = d
+			d--
+		}
+	}
+	// Double-check depths.
+	for _, l := range loops {
+		want := int16(1)
+		if l.outer != nil {
+			want = l.outer.depth + 1
 		}
-		if !callfreepath {
-			l.containsUnavoidableCall = true
+		if l.depth != want {
+			l.header.Fatalf("bad depth calculation for loop %s: got %d want %d", l.header, l.depth, want)
 		}
 	}
 
+	ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
+
 	// Curious about the loopiness? "-d=ssa/likelyadjust/stats"
 	if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 {
-		ln.assembleChildren()
-		ln.calculateDepths()
-		ln.findExits()
 
 		// Note stats for non-innermost loops are slightly flawed because
 		// they don't account for inner loop exits that span multiple levels.
 
 		for _, l := range loops {
-			x := len(l.exits)
-			cf := 0
-			if !l.containsUnavoidableCall {
-				cf = 1
-			}
 			inner := 0
 			if l.isInner {
 				inner++
 			}
 
-			f.LogStat("loopstats:",
-				l.depth, "depth", x, "exits",
-				inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks")
+			f.LogStat("loopstats in "+f.Name+":",
+				l.depth, "depth",
+				inner, "is_inner", l.nBlocks, "n_blocks")
 		}
 	}
 
@@ -465,62 +415,6 @@ func loopnestfor(f *Func) *loopnest {
 	return ln
 }
 
-// assembleChildren initializes the children field of each
-// loop in the nest.  Loop A is a child of loop B if A is
-// directly nested within B (based on the reducible-loops
-// detection above)
-func (ln *loopnest) assembleChildren() {
-	if ln.initializedChildren {
-		return
-	}
-	for _, l := range ln.loops {
-		if l.outer != nil {
-			l.outer.children = append(l.outer.children, l)
-		}
-	}
-	ln.initializedChildren = true
-}
-
-// calculateDepths uses the children field of loops
-// to determine the nesting depth (outer=1) of each
-// loop.  This is helpful for finding exit edges.
-func (ln *loopnest) calculateDepths() {
-	if ln.initializedDepth {
-		return
-	}
-	ln.assembleChildren()
-	for _, l := range ln.loops {
-		if l.outer == nil {
-			l.setDepth(1)
-		}
-	}
-	ln.initializedDepth = true
-}
-
-// findExits uses loop depth information to find the
-// exits from a loop.
-func (ln *loopnest) findExits() {
-	if ln.initializedExits {
-		return
-	}
-	ln.calculateDepths()
-	b2l := ln.b2l
-	for _, b := range ln.po {
-		l := b2l[b.ID]
-		if l != nil && len(b.Succs) == 2 {
-			sl := b2l[b.Succs[0].b.ID]
-			if recordIfExit(l, sl, b.Succs[0].b) {
-				continue
-			}
-			sl = b2l[b.Succs[1].b.ID]
-			if recordIfExit(l, sl, b.Succs[1].b) {
-				continue
-			}
-		}
-	}
-	ln.initializedExits = true
-}
-
 // depth returns the loop nesting level of block b.
 func (ln *loopnest) depth(b ID) int16 {
 	if l := ln.b2l[b]; l != nil {
@@ -528,39 +422,3 @@ func (ln *loopnest) depth(b ID) int16 {
 	}
 	return 0
 }
-
-// recordIfExit checks sl (the loop containing b) to see if it
-// is outside of loop l, and if so, records b as an exit block
-// from l and returns true.
-func recordIfExit(l, sl *loop, b *Block) bool {
-	if sl != l {
-		if sl == nil || sl.depth <= l.depth {
-			l.exits = append(l.exits, b)
-			return true
-		}
-		// sl is not nil, and is deeper than l
-		// it's possible for this to be a goto into an irreducible loop made from gotos.
-		for sl.depth > l.depth {
-			sl = sl.outer
-		}
-		if sl != l {
-			l.exits = append(l.exits, b)
-			return true
-		}
-	}
-	return false
-}
-
-func (l *loop) setDepth(d int16) {
-	l.depth = d
-	for _, c := range l.children {
-		c.setDepth(d + 1)
-	}
-}
-
-// iterationEnd checks if block b ends iteration of loop l.
-// Ending iteration means either escaping to outer loop/code or
-// going back to header
-func (l *loop) iterationEnd(b *Block, b2l []*loop) bool {
-	return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth)
-}
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go
index f1e210fe9b..5be771571f 100644
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -2735,7 +2735,7 @@ func (s *regAllocState) computeLive() {
 	// out to all of them.
 	po := f.postorder()
 	s.loopnest = f.loopnest()
-	s.loopnest.calculateDepths()
+	s.loopnest.computeUnavoidableCalls()
 	for {
 		changed := false
 
@@ -3042,3 +3042,72 @@ func (d *desiredState) merge(x *desiredState) {
 		d.addList(e.ID, e.regs)
 	}
 }
+
+// computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
+func (loopnest *loopnest) computeUnavoidableCalls() {
+	f := loopnest.f
+
+	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
+	defer f.Cache.freeBoolSlice(hasCall)
+	for _, b := range f.Blocks {
+		if b.containsCall() {
+			hasCall[b.ID] = true
+		}
+	}
+	found := f.Cache.allocSparseSet(f.NumBlocks())
+	defer f.Cache.freeSparseSet(found)
+	// Run dfs to find path through the loop that avoids all calls.
+	// Such path either escapes the loop or returns back to the header.
+	// It isn't enough to have exit not dominated by any call, for example:
+	// ... some loop
+	// call1    call2
+	//   \       /
+	//     block
+	// ...
+	// block is not dominated by any single call, but we don't have call-free path to it.
+loopLoop:
+	for _, l := range loopnest.loops {
+		found.clear()
+		tovisit := make([]*Block, 0, 8)
+		tovisit = append(tovisit, l.header)
+		for len(tovisit) > 0 {
+			cur := tovisit[len(tovisit)-1]
+			tovisit = tovisit[:len(tovisit)-1]
+			if hasCall[cur.ID] {
+				continue
+			}
+			for _, s := range cur.Succs {
+				nb := s.Block()
+				if nb == l.header {
+					// Found a call-free path around the loop.
+					continue loopLoop
+				}
+				if found.contains(nb.ID) {
+					// Already found via another path.
+					continue
+				}
+				nl := loopnest.b2l[nb.ID]
+				if nl == nil || (nl.depth <= l.depth && nl != l) {
+					// Left the loop.
+					continue
+				}
+				tovisit = append(tovisit, nb)
+				found.add(nb.ID)
+			}
+		}
+		// No call-free path was found.
+		l.containsUnavoidableCall = true
+	}
+}
+
+func (b *Block) containsCall() bool {
+	if b.Kind == BlockDefer {
+		return true
+	}
+	for _, v := range b.Values {
+		if opcodeTable[v.Op].call {
+			return true
+		}
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/ssa/rewrite.go b/src/cmd/compile/internal/ssa/rewrite.go
index 9c31cd9977..9185d46499 100644
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@@ -303,7 +303,6 @@ func canMergeLoadClobber(target, load, x *Value) bool {
 		return false
 	}
 	loopnest := x.Block.Func.loopnest()
-	loopnest.calculateDepths()
 	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 		return false
 	}
diff --git a/src/cmd/compile/internal/ssa/tighten.go b/src/cmd/compile/internal/ssa/tighten.go
index eb5007b26e..48efdb5609 100644
--- a/src/cmd/compile/internal/ssa/tighten.go
+++ b/src/cmd/compile/internal/ssa/tighten.go
@@ -82,7 +82,6 @@ func tighten(f *Func) {
 	// We use this to make sure we don't tighten a value into a (deeper) loop.
 	idom := f.Idom()
 	loops := f.loopnest()
-	loops.calculateDepths()
 
 	changed := true
 	for changed {
-- 
2.52.0