]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: fuse before branchelim
authorJosh Bleecher Snyder <josharian@gmail.com>
Sun, 27 May 2018 16:03:45 +0000 (09:03 -0700)
committerJosh Bleecher Snyder <josharian@gmail.com>
Mon, 15 Oct 2018 16:51:08 +0000 (16:51 +0000)
The branchelim pass works better after fuse.
Running fuse before branchelim also increases
the stability of generated code amidst other compiler changes,
which was the original motivation behind this change.

The fuse pass is not cheap enough to run in its entirety
before branchelim, but the most important half of it is.
This change makes it possible to run "plain fuse" independently
and does so before branchelim.

During make.bash, elimIf occurrences increase from 4244 to 4288 (1%),
and elimIfElse occurrences increase from 989 to 1079 (9%).

Toolspeed impact is marginal; plain fuse pays for itself.

name        old time/op       new time/op       delta
Template          189ms ± 2%        189ms ± 2%    ~     (p=0.890 n=45+46)
Unicode          93.2ms ± 5%       93.4ms ± 7%    ~     (p=0.790 n=48+48)
GoTypes           662ms ± 4%        660ms ± 4%    ~     (p=0.186 n=48+49)
Compiler          2.89s ± 4%        2.91s ± 3%  +0.89%  (p=0.050 n=49+44)
SSA               8.23s ± 2%        8.21s ± 1%    ~     (p=0.165 n=46+44)
Flate             123ms ± 4%        123ms ± 3%  +0.58%  (p=0.031 n=47+49)
GoParser          154ms ± 4%        154ms ± 4%    ~     (p=0.492 n=49+48)
Reflect           430ms ± 4%        429ms ± 4%    ~     (p=1.000 n=48+48)
Tar               171ms ± 3%        170ms ± 4%    ~     (p=0.122 n=48+48)
XML               232ms ± 3%        232ms ± 2%    ~     (p=0.850 n=46+49)
[Geo mean]        394ms             394ms       +0.02%

name        old user-time/op  new user-time/op  delta
Template          236ms ± 5%        236ms ± 4%    ~     (p=0.934 n=50+50)
Unicode           132ms ± 7%        130ms ± 9%    ~     (p=0.087 n=50+50)
GoTypes           861ms ± 3%        867ms ± 4%    ~     (p=0.124 n=48+50)
Compiler          3.93s ± 4%        3.94s ± 3%    ~     (p=0.584 n=49+44)
SSA               12.2s ± 2%        12.3s ± 1%    ~     (p=0.610 n=46+45)
Flate             149ms ± 4%        150ms ± 4%    ~     (p=0.194 n=48+49)
GoParser          193ms ± 5%        191ms ± 6%    ~     (p=0.239 n=49+50)
Reflect           553ms ± 5%        556ms ± 5%    ~     (p=0.091 n=49+49)
Tar               218ms ± 5%        218ms ± 5%    ~     (p=0.359 n=49+50)
XML               299ms ± 5%        298ms ± 4%    ~     (p=0.482 n=50+49)
[Geo mean]        516ms             516ms       -0.01%

name        old alloc/op      new alloc/op      delta
Template         36.3MB ± 0%       36.3MB ± 0%  -0.02%  (p=0.000 n=49+49)
Unicode          29.7MB ± 0%       29.7MB ± 0%    ~     (p=0.270 n=50+50)
GoTypes           126MB ± 0%        126MB ± 0%  -0.34%  (p=0.000 n=50+49)
Compiler          534MB ± 0%        531MB ± 0%  -0.50%  (p=0.000 n=50+50)
SSA              1.98GB ± 0%       1.98GB ± 0%  -0.06%  (p=0.000 n=49+49)
Flate            24.6MB ± 0%       24.6MB ± 0%  -0.29%  (p=0.000 n=50+50)
GoParser         29.5MB ± 0%       29.4MB ± 0%  -0.15%  (p=0.000 n=49+50)
Reflect          87.3MB ± 0%       87.2MB ± 0%  -0.13%  (p=0.000 n=49+50)
Tar              35.6MB ± 0%       35.5MB ± 0%  -0.17%  (p=0.000 n=50+50)
XML              48.2MB ± 0%       48.0MB ± 0%  -0.30%  (p=0.000 n=48+50)
[Geo mean]       83.1MB            82.9MB       -0.20%

name        old allocs/op     new allocs/op     delta
Template           352k ± 0%         352k ± 0%  -0.01%  (p=0.004 n=49+49)
Unicode            341k ± 0%         341k ± 0%    ~     (p=0.341 n=48+50)
GoTypes           1.28M ± 0%        1.28M ± 0%  -0.03%  (p=0.000 n=50+49)
Compiler          4.96M ± 0%        4.96M ± 0%  -0.05%  (p=0.000 n=50+49)
SSA               15.5M ± 0%        15.5M ± 0%  -0.01%  (p=0.000 n=50+49)
Flate              233k ± 0%         233k ± 0%  +0.01%  (p=0.032 n=49+49)
GoParser           294k ± 0%         294k ± 0%    ~     (p=0.052 n=46+48)
Reflect           1.04M ± 0%        1.04M ± 0%    ~     (p=0.171 n=50+47)
Tar                343k ± 0%         343k ± 0%  -0.03%  (p=0.000 n=50+50)
XML                429k ± 0%         429k ± 0%  -0.04%  (p=0.000 n=50+50)
[Geo mean]         812k              812k       -0.02%

Object files grow slightly; branchelim often increases binary size, at least on amd64.

name        old object-bytes  new object-bytes  delta
Template          509kB ± 0%        509kB ± 0%  -0.01%  (p=0.008 n=5+5)
Unicode           224kB ± 0%        224kB ± 0%    ~     (all equal)
GoTypes          1.84MB ± 0%       1.84MB ± 0%  +0.00%  (p=0.008 n=5+5)
Compiler         6.71MB ± 0%       6.71MB ± 0%  +0.01%  (p=0.008 n=5+5)
SSA              21.2MB ± 0%       21.2MB ± 0%  +0.01%  (p=0.008 n=5+5)
Flate             324kB ± 0%        324kB ± 0%  -0.00%  (p=0.008 n=5+5)
GoParser          404kB ± 0%        404kB ± 0%  -0.02%  (p=0.008 n=5+5)
Reflect          1.40MB ± 0%       1.40MB ± 0%  +0.09%  (p=0.008 n=5+5)
Tar               452kB ± 0%        452kB ± 0%  +0.06%  (p=0.008 n=5+5)
XML               596kB ± 0%        596kB ± 0%  +0.00%  (p=0.008 n=5+5)
[Geo mean]       1.04MB            1.04MB       +0.01%

Change-Id: I535c711b85380ff657fc0f022bebd9cb14ddd07f
Reviewed-on: https://go-review.googlesource.com/c/129378
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/fuse.go
src/cmd/compile/internal/ssa/fuse_test.go
src/cmd/compile/internal/ssa/nilcheck_test.go

index 8b5d6d94e8d4610bcbc3f103f2fd130ac8ba4d48..7f933cb66e0ea7be45670d1283329a2a58850b36 100644 (file)
@@ -373,6 +373,7 @@ var passes = [...]pass{
        {name: "phiopt", fn: phiopt},
        {name: "nilcheckelim", fn: nilcheckelim},
        {name: "prove", fn: prove},
+       {name: "fuse plain", fn: fusePlain},
        {name: "decompose builtin", fn: decomposeBuiltIn, required: true},
        {name: "softfloat", fn: softfloat, required: true},
        {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
@@ -380,7 +381,7 @@ var passes = [...]pass{
        {name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
        {name: "check bce", fn: checkbce},
        {name: "branchelim", fn: branchelim},
-       {name: "fuse", fn: fuse},
+       {name: "fuse", fn: fuseAll},
        {name: "dse", fn: dse},
        {name: "writebarrier", fn: writebarrier, required: true}, // expand write barrier ops
        {name: "insert resched checks", fn: insertLoopReschedChecks,
index 4f9a2ad9cab78e71e61bed0925e418b05208d393..c451904124bf40b76412d8e6b2168e981a052f9b 100644 (file)
@@ -8,15 +8,33 @@ import (
        "cmd/internal/src"
 )
 
+// fusePlain runs fuse(f, fuseTypePlain).
+func fusePlain(f *Func) { fuse(f, fuseTypePlain) }
+
+// fuseAll runs fuse(f, fuseTypeAll).
+func fuseAll(f *Func) { fuse(f, fuseTypeAll) }
+
+type fuseType uint8
+
+const (
+       fuseTypePlain fuseType = 1 << iota
+       fuseTypeIf
+       fuseTypeAll = fuseTypePlain | fuseTypeIf
+)
+
 // fuse simplifies control flow by joining basic blocks.
-func fuse(f *Func) {
+func fuse(f *Func, typ fuseType) {
        for changed := true; changed; {
                changed = false
                // Fuse from end to beginning, to avoid quadratic behavior in fuseBlockPlain. See issue 13554.
                for i := len(f.Blocks) - 1; i >= 0; i-- {
                        b := f.Blocks[i]
-                       changed = fuseBlockIf(b) || changed
-                       changed = fuseBlockPlain(b) || changed
+                       if typ&fuseTypeIf != 0 {
+                               changed = fuseBlockIf(b) || changed
+                       }
+                       if typ&fuseTypePlain != 0 {
+                               changed = fuseBlockPlain(b) || changed
+                       }
                }
        }
 }
index bba92f805ea7160362f746c301239e91ef8d32ef..c3e25a80c4b022e0b9910f856107f28bcc4d1bd9 100644 (file)
@@ -26,7 +26,7 @@ func TestFuseEliminatesOneBranch(t *testing.T) {
                        Exit("mem")))
 
        CheckFunc(fun.f)
-       fuse(fun.f)
+       fuseAll(fun.f)
 
        for _, b := range fun.f.Blocks {
                if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@@ -56,7 +56,7 @@ func TestFuseEliminatesBothBranches(t *testing.T) {
                        Exit("mem")))
 
        CheckFunc(fun.f)
-       fuse(fun.f)
+       fuseAll(fun.f)
 
        for _, b := range fun.f.Blocks {
                if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@@ -90,7 +90,7 @@ func TestFuseHandlesPhis(t *testing.T) {
                        Exit("mem")))
 
        CheckFunc(fun.f)
-       fuse(fun.f)
+       fuseAll(fun.f)
 
        for _, b := range fun.f.Blocks {
                if b == fun.blocks["then"] && b.Kind != BlockInvalid {
@@ -122,7 +122,7 @@ func TestFuseEliminatesEmptyBlocks(t *testing.T) {
                ))
 
        CheckFunc(fun.f)
-       fuse(fun.f)
+       fuseAll(fun.f)
 
        for k, b := range fun.blocks {
                if k[:1] == "z" && b.Kind != BlockInvalid {
@@ -162,7 +162,7 @@ func BenchmarkFuse(b *testing.B) {
                        b.ResetTimer()
                        for i := 0; i < b.N; i++ {
                                fun := c.Fun("entry", blocks...)
-                               fuse(fun.f)
+                               fuseAll(fun.f)
                        }
                })
        }
index 815c4a5047289f9701a59558a49b11297ccc2959..b2f5cae08818f1b5fc5d4235a2821b7db7c310d4 100644 (file)
@@ -87,7 +87,7 @@ func TestNilcheckSimple(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -124,7 +124,7 @@ func TestNilcheckDomOrder(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -157,7 +157,7 @@ func TestNilcheckAddr(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -191,7 +191,7 @@ func TestNilcheckAddPtr(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -235,7 +235,7 @@ func TestNilcheckPhi(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -276,7 +276,7 @@ func TestNilcheckKeepRemove(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -323,7 +323,7 @@ func TestNilcheckInFalseBranch(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -374,7 +374,7 @@ func TestNilcheckUser(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)
@@ -418,7 +418,7 @@ func TestNilcheckBug(t *testing.T) {
        nilcheckelim(fun.f)
 
        // clean up the removed nil check
-       fuse(fun.f)
+       fusePlain(fun.f)
        deadcode(fun.f)
 
        CheckFunc(fun.f)