]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: match more patterns for shortcircuit
authorJunyang Shao <shaojunyang@google.com>
Sun, 9 Mar 2025 20:14:32 +0000 (20:14 +0000)
committerGopher Robot <gobot@golang.org>
Thu, 27 Mar 2025 19:30:03 +0000 (12:30 -0700)
This CL tries to generalize the pattern matching of certain
shortcircuit-able CFGs a bit more:
For a shortcircuit-able CFG:
p   q
 \ /
  b
 / \
t   u
Where the constant branch is t, and b has multiple phi values
other than the control phi.

For the non-constant branch target u, we try to match the
"diamond" shape CFG:
p   q
 \ /
  b
 / \
t   u
 \ /
  m

or

p   q
 \ /
  b
  |\
  | u
  |/
  m

Instead of matching u as a single block, we know try to
generalize it as a subgraph that satisfy condition:
it's a DAG that has a single entry point u, and has a
path to m.

compilebench stats:
                         │   old.txt   │              new.txt               │
                         │   sec/op    │   sec/op     vs base               │
Template                   109.4m ± 3%   109.8m ± 3%       ~ (p=0.796 n=10)
Unicode                    94.23m ± 1%   93.85m ± 1%       ~ (p=0.631 n=10)
GoTypes                    538.2m ± 1%   538.8m ± 1%       ~ (p=0.912 n=10)
Compiler                   90.21m ± 1%   90.02m ± 1%       ~ (p=0.436 n=10)
SSA                         3.318 ± 1%    3.323 ± 1%       ~ (p=1.000 n=10)
Flate                      69.38m ± 1%   69.57m ± 2%       ~ (p=0.529 n=10)
GoParser                   128.5m ± 1%   127.4m ± 1%       ~ (p=0.075 n=10)
Reflect                    267.4m ± 1%   267.2m ± 2%       ~ (p=0.739 n=10)
Tar                        127.7m ± 2%   126.4m ± 1%       ~ (p=0.353 n=10)
XML                        149.5m ± 1%   149.6m ± 2%       ~ (p=0.684 n=10)
LinkCompiler               390.0m ± 1%   388.4m ± 2%       ~ (p=0.353 n=10)
ExternalLinkCompiler        1.296 ± 0%    1.296 ± 1%       ~ (p=0.971 n=10)
LinkWithoutDebugCompiler   226.3m ± 1%   225.5m ± 1%       ~ (p=0.393 n=10)
StdCmd                      13.26 ± 0%    13.25 ± 1%       ~ (p=0.529 n=10)
geomean                    319.3m        318.8m       -0.17%

                         │   old.txt   │               new.txt               │
                         │ user-sec/op │ user-sec/op   vs base               │
Template                   293.1m ± 3%   291.4m ± 11%       ~ (p=0.436 n=10)
Unicode                    91.09m ± 5%   87.61m ±  7%       ~ (p=0.165 n=10)
GoTypes                     1.932 ± 3%    1.926 ±  3%       ~ (p=0.739 n=10)
Compiler                   125.8m ± 3%   121.5m ± 10%       ~ (p=0.481 n=10)
SSA                         18.93 ± 3%    18.89 ±  1%       ~ (p=0.684 n=10)
Flate                      158.5m ± 5%   160.0m ±  7%       ~ (p=0.971 n=10)
GoParser                   316.0m ± 9%   327.4m ±  7%       ~ (p=0.052 n=10)
Reflect                    845.6m ± 6%   861.6m ±  3%       ~ (p=0.579 n=10)
Tar                        358.1m ± 5%   348.5m ±  4%       ~ (p=0.089 n=10)
XML                        382.4m ± 4%   392.2m ±  3%       ~ (p=0.143 n=10)
LinkCompiler               609.1m ± 4%   627.9m ±  3%       ~ (p=0.123 n=10)
ExternalLinkCompiler        1.336 ± 2%    1.343 ±  4%       ~ (p=0.565 n=10)
LinkWithoutDebugCompiler   248.7m ± 3%   248.0m ±  1%       ~ (p=0.853 n=10)
geomean                    506.4m        506.8m        +0.08%

          │   old.txt    │               new.txt               │
          │  text-bytes  │  text-bytes   vs base               │
HelloSize   965.8Ki ± 0%   965.0Ki ± 0%  -0.08% (p=0.000 n=10)
CmdGoSize   12.30Mi ± 0%   12.29Mi ± 0%  -0.08% (p=0.000 n=10)
geomean     3.406Mi        3.403Mi       -0.08%

          │   old.txt    │                new.txt                │
          │  data-bytes  │  data-bytes   vs base                 │
HelloSize   15.08Ki ± 0%   15.08Ki ± 0%       ~ (p=1.000 n=10) ¹
CmdGoSize   408.5Ki ± 0%   408.5Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean     78.49Ki        78.49Ki       +0.00%
¹ all samples are equal

          │   old.txt    │                new.txt                │
          │  bss-bytes   │  bss-bytes    vs base                 │
HelloSize   142.0Ki ± 0%   142.0Ki ± 0%       ~ (p=1.000 n=10) ¹
CmdGoSize   206.4Ki ± 0%   206.4Ki ± 0%       ~ (p=1.000 n=10) ¹
geomean     171.2Ki        171.2Ki       +0.00%
¹ all samples are equal

          │   old.txt    │               new.txt               │
          │  exe-bytes   │  exe-bytes    vs base               │
HelloSize   1.466Mi ± 0%   1.462Mi ± 0%  -0.27% (p=0.000 n=10)
CmdGoSize   18.19Mi ± 0%   18.17Mi ± 0%  -0.10% (p=0.000 n=10)
geomean     5.164Mi        5.154Mi       -0.18%

Fixes #72132

Change-Id: I3d1cb10b6a158c5750adc23c79709d63dbd771f3
Reviewed-on: https://go-review.googlesource.com/c/go/+/656255
Auto-Submit: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
src/cmd/compile/internal/ssa/shortcircuit.go

index d7d0b6fe3335b6218f1d0789fee680fb54e2906b..b86596026b76eaf92ca82ea0a169a32105ca3655 100644 (file)
@@ -135,6 +135,8 @@ func shortcircuitBlock(b *Block) bool {
                // to reason about the values of phis.
                return false
        }
+       // We only process blocks with only phi values except for control
+       // value and its wrappers.
        if len(b.Values) != nval+nOtherPhi {
                return false
        }
@@ -293,78 +295,85 @@ func shortcircuitPhiPlan(b *Block, ctl *Value, cidx int, ti int64) func(*Value,
        // In these cases, we can reconstruct what the value
        // of any phi in b must be in the successor blocks.
 
-       if len(t.Preds) == 1 && len(t.Succs) == 1 &&
-               len(u.Preds) == 1 && len(u.Succs) == 1 &&
-               t.Succs[0].b == u.Succs[0].b && len(t.Succs[0].b.Preds) == 2 {
-               // p   q
-               //  \ /
-               //   b
-               //  / \
-               // t   u
-               //  \ /
-               //   m
-               //
-               // After the CFG modifications, this will look like
-               //
-               // p   q
-               // |  /
-               // | b
-               // |/ \
-               // t   u
-               //  \ /
-               //   m
-               //
-               // NB: t.Preds is (b, p), not (p, b).
+       if len(t.Preds) == 1 && len(t.Succs) == 1 && len(u.Preds) == 1 &&
+               len(t.Succs[0].b.Preds) == 2 {
                m := t.Succs[0].b
-               return func(v *Value, i int) {
-                       // Replace any uses of v in t and u with the value v must have,
-                       // given that we have arrived at that block.
-                       // Then move v to m and adjust its value accordingly;
-                       // this handles all other uses of v.
-                       argP, argQ := v.Args[cidx], v.Args[1^cidx]
-                       u.replaceUses(v, argQ)
-                       phi := t.Func.newValue(OpPhi, v.Type, t, v.Pos)
-                       phi.AddArg2(argQ, argP)
-                       t.replaceUses(v, phi)
-                       if v.Uses == 0 {
-                               return
-                       }
-                       v.moveTo(m, i)
-                       // The phi in m belongs to whichever pred idx corresponds to t.
-                       if m.Preds[0].b == t {
-                               v.SetArgs2(phi, argQ)
-                       } else {
-                               v.SetArgs2(argQ, phi)
+               if visited := u.flowsTo(m, 5); visited != nil {
+                       // p   q
+                       //  \ /
+                       //   b
+                       //  / \
+                       // t   U (sub graph that satisfy condition in flowsTo)
+                       //  \ /
+                       //   m
+                       //
+                       // After the CFG modifications, this will look like
+                       //
+                       // p   q
+                       // |  /
+                       // | b
+                       // |/ \
+                       // t   U
+                       //  \ /
+                       //   m
+                       //
+                       // NB: t.Preds is (b, p), not (p, b).
+                       return func(v *Value, i int) {
+                               // Replace any uses of v in t and u with the value v must have,
+                               // given that we have arrived at that block.
+                               // Then move v to m and adjust its value accordingly;
+                               // this handles all other uses of v.
+                               argP, argQ := v.Args[cidx], v.Args[1^cidx]
+                               phi := t.Func.newValue(OpPhi, v.Type, t, v.Pos)
+                               phi.AddArg2(argQ, argP)
+                               t.replaceUses(v, phi)
+                               for bb := range visited {
+                                       bb.replaceUses(v, argQ)
+                               }
+                               if v.Uses == 0 {
+                                       return
+                               }
+                               v.moveTo(m, i)
+                               // The phi in m belongs to whichever pred idx corresponds to t.
+                               if m.Preds[0].b == t {
+                                       v.SetArgs2(phi, argQ)
+                               } else {
+                                       v.SetArgs2(argQ, phi)
+                               }
                        }
                }
        }
 
-       if len(t.Preds) == 2 && len(u.Preds) == 1 && len(u.Succs) == 1 && u.Succs[0].b == t {
-               // p   q
-               //  \ /
-               //   b
-               //   |\
-               //   | u
-               //   |/
-               //   t
-               //
-               // After the CFG modifications, this will look like
-               //
-               //     q
-               //    /
-               //   b
-               //   |\
-               // p | u
-               //  \|/
-               //   t
-               //
-               // NB: t.Preds is (b or u, b or u, p).
-               return func(v *Value, i int) {
-                       // Replace any uses of v in u. Then move v to t.
-                       argP, argQ := v.Args[cidx], v.Args[1^cidx]
-                       u.replaceUses(v, argQ)
-                       v.moveTo(t, i)
-                       v.SetArgs3(argQ, argQ, argP)
+       if len(t.Preds) == 2 && len(u.Preds) == 1 {
+               if visited := u.flowsTo(t, 5); visited != nil {
+                       // p   q
+                       //  \ /
+                       //   b
+                       //   |\
+                       //   | U ((sub graph that satisfy condition in flowsTo))
+                       //   |/
+                       //   t
+                       //
+                       // After the CFG modifications, this will look like
+                       //
+                       //     q
+                       //    /
+                       //   b
+                       //   |\
+                       // p | U
+                       //  \|/
+                       //   t
+                       //
+                       // NB: t.Preds is (b or U, b or U, p).
+                       return func(v *Value, i int) {
+                               // Replace any uses of v in U. Then move v to t.
+                               argP, argQ := v.Args[cidx], v.Args[1^cidx]
+                               for bb := range visited {
+                                       bb.replaceUses(v, argQ)
+                               }
+                               v.moveTo(t, i)
+                               v.SetArgs3(argQ, argQ, argP)
+                       }
                }
        }
 
@@ -511,3 +520,77 @@ func (v *Value) moveTo(dst *Block, i int) {
        src.Values[last] = nil
        src.Values = src.Values[:last]
 }
+
+// flowsTo checks that the subgraph starting from v and ends at t is a DAG, with
+// the following constraints:
+//
+//     (1) v can reach t.
+//     (2) v's connected component removing the paths containing t is a DAG.
+//     (3) The blocks in the subgraph G defined in (2) has all their preds also in G,
+//         except v.
+//     (4) The subgraph defined in (2) has a size smaller than cap.
+//
+//     We know that the subgraph G defined in constraint (2)(3) has the property that v
+//     dominates all the blocks in G:
+//             If there exist a block x in G that is not dominated by v, then there exist a
+//             path P from entry to x that does not contain v. Denote x's predecessor in P
+//             as x', then x' must also be in G given constraint (3), same to its pred x''
+//             in P. Given constraint (2), by going back in P we will in the end reach v,
+//             which conflicts with the definition of P.
+//
+// Constraint (2)'s DAG requirement could be further relaxed to contain "internal"
+// loops that doesn't change the dominance relation of v. But that is more subtle
+// and requires another constraint on the source block v, and a more complex proof.
+// Furthermore optimizing the branch guarding a loop might bring less gains as the
+// loop itself might be the bottleneck.
+func (v *Block) flowsTo(t *Block, cap int) map[*Block]struct{} {
+       seen := map[*Block]struct{}{}
+       var boundedDFS func(b *Block)
+       hasPathToT := false
+       fullyExplored := true
+       isDAG := true
+       visited := map[*Block]struct{}{}
+       boundedDFS = func(b *Block) {
+               if _, ok := seen[b]; ok {
+                       return
+               }
+               if _, ok := visited[b]; ok {
+                       isDAG = false
+                       return
+               }
+               if b == t {
+                       // do not put t into seen, this way
+                       // if v can reach t's connected component without going through t,
+                       // it will fail the pred check after boundedDFSUntil.
+                       hasPathToT = true
+                       return
+               }
+               if len(seen) > cap {
+                       fullyExplored = false
+                       return
+               }
+               seen[b] = struct{}{}
+               visited[b] = struct{}{}
+               for _, se := range b.Succs {
+                       boundedDFS(se.b)
+                       if !(isDAG && fullyExplored) {
+                               return
+                       }
+               }
+               delete(visited, b)
+       }
+       boundedDFS(v)
+       if hasPathToT && fullyExplored && isDAG {
+               for b := range seen {
+                       if b != v {
+                               for _, se := range b.Preds {
+                                       if _, ok := seen[se.b]; !ok {
+                                               return nil
+                                       }
+                               }
+                       }
+               }
+               return seen
+       }
+       return nil
+}