From: Alexander Musman Date: Tue, 13 May 2025 06:44:17 +0000 (+0300) Subject: cmd/compile: improve loopRotate to handle nested loops X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=592c2db868c7465ae06a447a861c313ba071f3e6;p=gostls13.git cmd/compile: improve loopRotate to handle nested loops Enhance loop rotation of nested loops. Currently, loops are processed independently, resulting in unnecessary jumps between outer and inner loops. By processing inner loops before their parent loop, we ensure nested loop blocks are properly placed within their parent loop's block sequence. There is some code size improvement (as measured on amd64) due to jumps to/from inner loop are removed by the updated loopRotate block order: Executable Old .text New .text Change ------------------------------------------------------- asm 2147569 2146481 -0.05% cgo 1977457 1975761 -0.09% compile 10447345 10441905 -0.05% cover 2110097 2108977 -0.05% link 2930289 2929041 -0.04% preprofile 927345 926769 -0.06% vet 3279057 3277009 -0.06% Change-Id: I4b9e993c2be07fad735e6bcf32d062d099d9cfb5 Reviewed-on: https://go-review.googlesource.com/c/go/+/684335 Reviewed-by: Keith Randall Auto-Submit: Keith Randall Reviewed-by: Michael Knyszek Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/ssa/looprotate.go b/src/cmd/compile/internal/ssa/looprotate.go index f32125576f..2b9cc67990 100644 --- a/src/cmd/compile/internal/ssa/looprotate.go +++ b/src/cmd/compile/internal/ssa/looprotate.go @@ -4,6 +4,10 @@ package ssa +import ( + "slices" +) + // loopRotate converts loops with a check-loop-condition-at-beginning // to loops with a check-loop-condition-at-end. // This helps loops avoid extra unnecessary jumps. @@ -41,10 +45,65 @@ func loopRotate(f *Func) { // Map from block ID to the moving blocks that should // come right after it. + // If a block, which has its ID present in keys of the 'after' map, + // occurs in some other block's 'after' list, that represents whole + // nested loop, e.g. consider an inner loop I nested into an outer + // loop O. It and Ot are corresponding top block for these loops + // chosen by our algorithm, and It is in the Ot's 'after' list. + // + // Before: After: + // + // e e + // │ │ + // │ │Ot ◄───┐ + // ▼ ▼▼ │ + // ┌───Oh ◄────┐ ┌─┬─Oh │ + // │ │ │ │ │ │ + // │ │ │ │ │ It◄───┐ │ + // │ ▼ │ │ │ ▼ │ │ + // │ ┌─Ih◄───┐ │ │ └►Ih │ │ + // │ │ │ │ │ │ ┌─┤ │ │ + // │ │ ▼ │ │ │ │ ▼ │ │ + // │ │ Ib │ │ │ │ Ib │ │ + // │ │ └─►It─┘ │ │ │ └─────┘ │ + // │ │ │ │ │ │ + // │ └►Ie │ │ └►Ie │ + // │ └─►Ot───┘ │ └───────┘ + // │ │ + // └──►Oe └──►Oe + // + // We build the 'after' lists for each of the top blocks Ot and It: + // after[Ot]: Oh, It, Ie + // after[It]: Ih, Ib after := map[ID][]*Block{} + // Map from loop header ID to the new top block for the loop. + tops := map[ID]*Block{} + + // Order loops to rotate any child loop before adding its top block + // to the parent loop's 'after' list. + loopnest.calculateDepths() + loopOrder := f.Cache.allocIntSlice(len(loopnest.loops)) + for i := range loopOrder { + loopOrder[i] = i + } + defer f.Cache.freeIntSlice(loopOrder) + slices.SortFunc(loopOrder, func(i, j int) int { + di := loopnest.loops[i].depth + dj := loopnest.loops[j].depth + switch { + case di > dj: + return -1 + case di < dj: + return 1 + default: + return 0 + } + }) + // Check each loop header and decide if we want to move it. - for _, loop := range loopnest.loops { + for _, loopIdx := range loopOrder { + loop := loopnest.loops[loopIdx] b := loop.header var p *Block // b's in-loop predecessor for _, e := range b.Preds { @@ -59,6 +118,7 @@ func loopRotate(f *Func) { if p == nil { continue } + tops[loop.header.ID] = p p.Hotness |= HotInitial if f.IsPgoHot { p.Hotness |= HotPgo @@ -80,8 +140,10 @@ func loopRotate(f *Func) { if nextb == p { // original loop predecessor is next break } - if loopnest.b2l[nextb.ID] == loop { - after[p.ID] = append(after[p.ID], nextb) + if bloop := loopnest.b2l[nextb.ID]; bloop != nil { + if bloop == loop || bloop.outer == loop && tops[bloop.header.ID] == nextb { + after[p.ID] = append(after[p.ID], nextb) + } } b = nextb } @@ -90,7 +152,7 @@ func loopRotate(f *Func) { f.Blocks[idToIdx[p.ID]] = loop.header idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID] - // Place b after p. + // Place loop blocks after p. for _, b := range after[p.ID] { move[b.ID] = struct{}{} } @@ -107,16 +169,23 @@ func loopRotate(f *Func) { oldOrder := f.Cache.allocBlockSlice(len(f.Blocks)) defer f.Cache.freeBlockSlice(oldOrder) copy(oldOrder, f.Blocks) + var moveBlocks func(bs []*Block) + moveBlocks = func(blocks []*Block) { + for _, a := range blocks { + f.Blocks[j] = a + j++ + if nextBlocks, ok := after[a.ID]; ok { + moveBlocks(nextBlocks) + } + } + } for _, b := range oldOrder { if _, ok := move[b.ID]; ok { continue } f.Blocks[j] = b j++ - for _, a := range after[b.ID] { - f.Blocks[j] = a - j++ - } + moveBlocks(after[b.ID]) } if j != len(oldOrder) { f.Fatalf("bad reordering in looprotate") diff --git a/src/cmd/compile/internal/ssa/looprotate_test.go b/src/cmd/compile/internal/ssa/looprotate_test.go new file mode 100644 index 0000000000..8e7cfc343f --- /dev/null +++ b/src/cmd/compile/internal/ssa/looprotate_test.go @@ -0,0 +1,65 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ssa + +import ( + "cmd/compile/internal/types" + "testing" +) + +func TestLoopRotateNested(t *testing.T) { + c := testConfig(t) + fun := c.Fun("entry", + Bloc("entry", + Valu("mem", OpInitMem, types.TypeMem, 0, nil), + Valu("constTrue", OpConstBool, types.Types[types.TBOOL], 1, nil), + Goto("outerHeader")), + Bloc("outerHeader", + If("constTrue", "outerBody", "outerExit")), + Bloc("outerBody", + Goto("innerHeader")), + Bloc("innerHeader", + If("constTrue", "innerBody", "innerExit")), + Bloc("innerBody", + Goto("innerTop")), + Bloc("innerTop", + Goto("innerHeader")), + Bloc("innerExit", + Goto("outerTop")), + Bloc("outerTop", + Goto("outerHeader")), + Bloc("outerExit", + Exit("mem"))) + + blockName := make([]string, len(fun.f.Blocks)+1) + for name, block := range fun.blocks { + blockName[block.ID] = name + } + + CheckFunc(fun.f) + loopRotate(fun.f) + CheckFunc(fun.f) + + // Verify the resulting block order + expected := []string{ + "entry", + "outerTop", + "outerHeader", + "outerBody", + "innerTop", + "innerHeader", + "innerBody", + "innerExit", + "outerExit", + } + if len(expected) != len(fun.f.Blocks) { + t.Fatalf("expected %d blocks, found %d", len(expected), len(fun.f.Blocks)) + } + for i, b := range fun.f.Blocks { + if expected[i] != blockName[b.ID] { + t.Errorf("position %d: expected %s, found %s", i, expected[i], blockName[b.ID]) + } + } +}