From: thepudds Date: Wed, 12 Mar 2025 21:29:25 +0000 (-0400) Subject: cmd/compile/internal/escape: improve order of work to speed up analyzing many locations X-Git-Tag: go1.25rc1~174 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=74304cda29381fd5ba07a4090b701f8a670896c6;p=gostls13.git cmd/compile/internal/escape: improve order of work to speed up analyzing many locations For the package github.com/microsoft/typescript-go/internal/checker, compilation currently spends most of its time in escape analysis. Here, we re-order work to be more efficient when analyzing many locations, and delay visiting some locations to prioritize locations that might be more likely to reach a terminal point of reaching the heap and possibly reduce the count of intermediate states for each location. Action graph reported build times show roughly a 5x improvement for compilation of the typescript-go/internal/checker package: go1.24.0: 91.792s cl-657179-ps1: 17.578s with timing via: go build -a -debug-actiongraph=/tmp/actiongraph-cl-657179-ps1 -v github.com/microsoft/typescript-go/internal/checker There are some additional adjustments to make here, including we can consider a follow-on CL I have that parallelizes the operations of the core loop, but this seems to be a nice win as is, and my understanding is the desire is to merge this as it stands. Updates #72815 Change-Id: I1753c5354b495b059f68fb97f3103ee7834f9eee Reviewed-on: https://go-review.googlesource.com/c/go/+/657179 Reviewed-by: Cherry Mui Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/escape/graph.go b/src/cmd/compile/internal/escape/graph.go index cd800bc4d6..0bbf6bb941 100644 --- a/src/cmd/compile/internal/escape/graph.go +++ b/src/cmd/compile/internal/escape/graph.go @@ -62,9 +62,14 @@ type location struct { dst *location dstEdgeIdx int - // queued is used by walkAll to track whether this location is - // in the walk queue. - queued bool + // queuedWalkAll is used by walkAll to track whether this location is + // in its work queue. + queuedWalkAll bool + + // queuedWalkOne is used by walkOne to track whether this location is + // in its work queue. The value is the walkgen when this location was + // last queued for walkOne, or 0 if it's not currently queued. + queuedWalkOne uint32 // attrs is a bitset of location attributes. attrs locAttr @@ -288,6 +293,7 @@ func (e *escape) newLoc(n ir.Node, persists bool) *location { } else if loc.isName(ir.PPARAMOUT) { loc.paramOut = true } + if persists { loc.attrs |= attrPersists } diff --git a/src/cmd/compile/internal/escape/solve.go b/src/cmd/compile/internal/escape/solve.go index 4b0db1884d..d2263a7039 100644 --- a/src/cmd/compile/internal/escape/solve.go +++ b/src/cmd/compile/internal/escape/solve.go @@ -10,6 +10,7 @@ import ( "cmd/compile/internal/logopt" "cmd/internal/src" "fmt" + "math/bits" "strings" ) @@ -24,28 +25,41 @@ func (b *batch) walkAll() { // !persists->persists and !escapes->escapes, which can each // happen at most once. So we take Θ(len(e.allLocs)) walks. - // LIFO queue, has enough room for e.allLocs and e.heapLoc. - todo := make([]*location, 0, len(b.allLocs)+1) + // Queue of locations to walk. Has enough room for b.allLocs + // plus b.heapLoc, b.mutatorLoc, b.calleeLoc. + todo := newQueue(len(b.allLocs) + 3) + enqueue := func(loc *location) { - if !loc.queued { - todo = append(todo, loc) - loc.queued = true + if !loc.queuedWalkAll { + loc.queuedWalkAll = true + if loc.hasAttr(attrEscapes) { + // Favor locations that escape to the heap, + // which in some cases allows attrEscape to + // propagate faster. + todo.pushFront(loc) + } else { + todo.pushBack(loc) + } } } for _, loc := range b.allLocs { - enqueue(loc) + todo.pushFront(loc) + // TODO(thepudds): clean up setting queuedWalkAll. + loc.queuedWalkAll = true } - enqueue(&b.mutatorLoc) - enqueue(&b.calleeLoc) - enqueue(&b.heapLoc) + todo.pushFront(&b.mutatorLoc) + todo.pushFront(&b.calleeLoc) + todo.pushFront(&b.heapLoc) - var walkgen uint32 - for len(todo) > 0 { - root := todo[len(todo)-1] - todo = todo[:len(todo)-1] - root.queued = false + b.mutatorLoc.queuedWalkAll = true + b.calleeLoc.queuedWalkAll = true + b.heapLoc.queuedWalkAll = true + var walkgen uint32 + for todo.len() > 0 { + root := todo.popFront() + root.queuedWalkAll = false walkgen++ b.walkOne(root, walkgen, enqueue) } @@ -77,10 +91,12 @@ func (b *batch) walkOne(root *location, walkgen uint32, enqueue func(*location)) } } - todo := []*location{root} // LIFO queue - for len(todo) > 0 { - l := todo[len(todo)-1] - todo = todo[:len(todo)-1] + todo := newQueue(1) + todo.pushFront(root) + + for todo.len() > 0 { + l := todo.popFront() + l.queuedWalkOne = 0 // no longer queued for walkOne derefs := l.derefs var newAttrs locAttr @@ -167,7 +183,14 @@ func (b *batch) walkOne(root *location, walkgen uint32, enqueue func(*location)) edge.src.derefs = d edge.src.dst = l edge.src.dstEdgeIdx = i - todo = append(todo, edge.src) + // Check if already queued in todo. + if edge.src.queuedWalkOne != walkgen { + edge.src.queuedWalkOne = walkgen // Mark queued for this walkgen. + + // Place at the back to possibly give time for + // other possible attribute changes to src. + todo.pushBack(edge.src) + } } } } @@ -310,3 +333,65 @@ func (b *batch) outlives(l, other *location) bool { return false } + +// queue implements a queue of locations for use in WalkAll and WalkOne. +// It supports pushing to front & back, and popping from front. +// TODO(thepudds): does cmd/compile have a deque or similar somewhere? +type queue struct { + locs []*location + head int // index of front element + tail int // next back element + elems int +} + +func newQueue(capacity int) *queue { + capacity = max(capacity, 2) + capacity = 1 << bits.Len64(uint64(capacity-1)) // round up to a power of 2 + return &queue{locs: make([]*location, capacity)} +} + +// pushFront adds an element to the front of the queue. +func (q *queue) pushFront(loc *location) { + if q.elems == len(q.locs) { + q.grow() + } + q.head = q.wrap(q.head - 1) + q.locs[q.head] = loc + q.elems++ +} + +// pushBack adds an element to the back of the queue. +func (q *queue) pushBack(loc *location) { + if q.elems == len(q.locs) { + q.grow() + } + q.locs[q.tail] = loc + q.tail = q.wrap(q.tail + 1) + q.elems++ +} + +// popFront removes the front of the queue. +func (q *queue) popFront() *location { + if q.elems == 0 { + return nil + } + loc := q.locs[q.head] + q.head = q.wrap(q.head + 1) + q.elems-- + return loc +} + +// grow doubles the capacity. +func (q *queue) grow() { + newLocs := make([]*location, len(q.locs)*2) + for i := range q.elems { + // Copy over our elements in order. + newLocs[i] = q.locs[q.wrap(q.head+i)] + } + q.locs = newLocs + q.head = 0 + q.tail = q.elems +} + +func (q *queue) len() int { return q.elems } +func (q *queue) wrap(i int) int { return i & (len(q.locs) - 1) }