]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: improve loopRotate to handle nested loops
authorAlexander Musman <alexander.musman@gmail.com>
Tue, 13 May 2025 06:44:17 +0000 (09:44 +0300)
committerGopher Robot <gobot@golang.org>
Thu, 24 Jul 2025 19:40:00 +0000 (12:40 -0700)
Enhance loop rotation of nested loops. Currently, loops are processed independently,
resulting in unnecessary jumps between outer and inner loops. By processing inner
loops before their parent loop, we ensure nested loop blocks are
properly placed within their parent loop's block sequence.

There is some code size improvement (as measured on amd64) due to jumps
to/from inner loop are removed by the updated loopRotate block order:

Executable            Old .text  New .text     Change
-------------------------------------------------------
asm                     2147569    2146481     -0.05%
cgo                     1977457    1975761     -0.09%
compile                10447345   10441905     -0.05%
cover                   2110097    2108977     -0.05%
link                    2930289    2929041     -0.04%
preprofile               927345     926769     -0.06%
vet                     3279057    3277009     -0.06%

Change-Id: I4b9e993c2be07fad735e6bcf32d062d099d9cfb5
Reviewed-on: https://go-review.googlesource.com/c/go/+/684335
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/ssa/looprotate.go
src/cmd/compile/internal/ssa/looprotate_test.go [new file with mode: 0644]

index f32125576f56ada3beae01454b8e083b6decdb4b..2b9cc6799061278919d6e695e849e0e9eadbb8f5 100644 (file)
@@ -4,6 +4,10 @@
 
 package ssa
 
+import (
+       "slices"
+)
+
 // loopRotate converts loops with a check-loop-condition-at-beginning
 // to loops with a check-loop-condition-at-end.
 // This helps loops avoid extra unnecessary jumps.
@@ -41,10 +45,65 @@ func loopRotate(f *Func) {
 
        // Map from block ID to the moving blocks that should
        // come right after it.
+       // If a block, which has its ID present in keys of the 'after' map,
+       // occurs in some other block's 'after' list, that represents whole
+       // nested loop, e.g. consider an inner loop I nested into an outer
+       // loop O. It and Ot are corresponding top block for these loops
+       // chosen by our algorithm, and It is in the Ot's 'after' list.
+       //
+       //    Before:                     After:
+       //
+       //       e                       e
+       //       │                       │
+       //       │                       │Ot ◄───┐
+       //       ▼                       ▼▼      │
+       //   ┌───Oh ◄────┐           ┌─┬─Oh      │
+       //   │   │       │           │ │         │
+       //   │   │       │           │ │ It◄───┐ │
+       //   │   ▼       │           │ │ ▼     │ │
+       //   │ ┌─Ih◄───┐ │           │ └►Ih    │ │
+       //   │ │ │     │ │           │ ┌─┤     │ │
+       //   │ │ ▼     │ │           │ │ ▼     │ │
+       //   │ │ Ib    │ │           │ │ Ib    │ │
+       //   │ │ └─►It─┘ │           │ │ └─────┘ │
+       //   │ │         │           │ │         │
+       //   │ └►Ie      │           │ └►Ie      │
+       //   │   └─►Ot───┘           │   └───────┘
+       //   │                       │
+       //   └──►Oe                  └──►Oe
+       //
+       // We build the 'after' lists for each of the top blocks Ot and It:
+       //   after[Ot]: Oh, It, Ie
+       //   after[It]: Ih, Ib
        after := map[ID][]*Block{}
 
+       // Map from loop header ID to the new top block for the loop.
+       tops := map[ID]*Block{}
+
+       // Order loops to rotate any child loop before adding its top block
+       // to the parent loop's 'after' list.
+       loopnest.calculateDepths()
+       loopOrder := f.Cache.allocIntSlice(len(loopnest.loops))
+       for i := range loopOrder {
+               loopOrder[i] = i
+       }
+       defer f.Cache.freeIntSlice(loopOrder)
+       slices.SortFunc(loopOrder, func(i, j int) int {
+               di := loopnest.loops[i].depth
+               dj := loopnest.loops[j].depth
+               switch {
+               case di > dj:
+                       return -1
+               case di < dj:
+                       return 1
+               default:
+                       return 0
+               }
+       })
+
        // Check each loop header and decide if we want to move it.
-       for _, loop := range loopnest.loops {
+       for _, loopIdx := range loopOrder {
+               loop := loopnest.loops[loopIdx]
                b := loop.header
                var p *Block // b's in-loop predecessor
                for _, e := range b.Preds {
@@ -59,6 +118,7 @@ func loopRotate(f *Func) {
                if p == nil {
                        continue
                }
+               tops[loop.header.ID] = p
                p.Hotness |= HotInitial
                if f.IsPgoHot {
                        p.Hotness |= HotPgo
@@ -80,8 +140,10 @@ func loopRotate(f *Func) {
                        if nextb == p { // original loop predecessor is next
                                break
                        }
-                       if loopnest.b2l[nextb.ID] == loop {
-                               after[p.ID] = append(after[p.ID], nextb)
+                       if bloop := loopnest.b2l[nextb.ID]; bloop != nil {
+                               if bloop == loop || bloop.outer == loop && tops[bloop.header.ID] == nextb {
+                                       after[p.ID] = append(after[p.ID], nextb)
+                               }
                        }
                        b = nextb
                }
@@ -90,7 +152,7 @@ func loopRotate(f *Func) {
                f.Blocks[idToIdx[p.ID]] = loop.header
                idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID]
 
-               // Place b after p.
+               // Place loop blocks after p.
                for _, b := range after[p.ID] {
                        move[b.ID] = struct{}{}
                }
@@ -107,16 +169,23 @@ func loopRotate(f *Func) {
        oldOrder := f.Cache.allocBlockSlice(len(f.Blocks))
        defer f.Cache.freeBlockSlice(oldOrder)
        copy(oldOrder, f.Blocks)
+       var moveBlocks func(bs []*Block)
+       moveBlocks = func(blocks []*Block) {
+               for _, a := range blocks {
+                       f.Blocks[j] = a
+                       j++
+                       if nextBlocks, ok := after[a.ID]; ok {
+                               moveBlocks(nextBlocks)
+                       }
+               }
+       }
        for _, b := range oldOrder {
                if _, ok := move[b.ID]; ok {
                        continue
                }
                f.Blocks[j] = b
                j++
-               for _, a := range after[b.ID] {
-                       f.Blocks[j] = a
-                       j++
-               }
+               moveBlocks(after[b.ID])
        }
        if j != len(oldOrder) {
                f.Fatalf("bad reordering in looprotate")
diff --git a/src/cmd/compile/internal/ssa/looprotate_test.go b/src/cmd/compile/internal/ssa/looprotate_test.go
new file mode 100644 (file)
index 0000000..8e7cfc3
--- /dev/null
@@ -0,0 +1,65 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+       "cmd/compile/internal/types"
+       "testing"
+)
+
+func TestLoopRotateNested(t *testing.T) {
+       c := testConfig(t)
+       fun := c.Fun("entry",
+               Bloc("entry",
+                       Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+                       Valu("constTrue", OpConstBool, types.Types[types.TBOOL], 1, nil),
+                       Goto("outerHeader")),
+               Bloc("outerHeader",
+                       If("constTrue", "outerBody", "outerExit")),
+               Bloc("outerBody",
+                       Goto("innerHeader")),
+               Bloc("innerHeader",
+                       If("constTrue", "innerBody", "innerExit")),
+               Bloc("innerBody",
+                       Goto("innerTop")),
+               Bloc("innerTop",
+                       Goto("innerHeader")),
+               Bloc("innerExit",
+                       Goto("outerTop")),
+               Bloc("outerTop",
+                       Goto("outerHeader")),
+               Bloc("outerExit",
+                       Exit("mem")))
+
+       blockName := make([]string, len(fun.f.Blocks)+1)
+       for name, block := range fun.blocks {
+               blockName[block.ID] = name
+       }
+
+       CheckFunc(fun.f)
+       loopRotate(fun.f)
+       CheckFunc(fun.f)
+
+       // Verify the resulting block order
+       expected := []string{
+               "entry",
+               "outerTop",
+               "outerHeader",
+               "outerBody",
+               "innerTop",
+               "innerHeader",
+               "innerBody",
+               "innerExit",
+               "outerExit",
+       }
+       if len(expected) != len(fun.f.Blocks) {
+               t.Fatalf("expected %d blocks, found %d", len(expected), len(fun.f.Blocks))
+       }
+       for i, b := range fun.f.Blocks {
+               if expected[i] != blockName[b.ID] {
+                       t.Errorf("position %d: expected %s, found %s", i, expected[i], blockName[b.ID])
+               }
+       }
+}