]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal/ssa: update regalloc in loops
authorIlya Tocar <ilya.tocar@intel.com>
Thu, 14 Dec 2017 19:27:11 +0000 (13:27 -0600)
committerIlya Tocar <ilya.tocar@intel.com>
Tue, 20 Mar 2018 21:02:39 +0000 (21:02 +0000)
Currently we don't lift spill out of loop if loop contains call.
However often we have code like this:

for .. {
    if hard_case {
call()
    }
    // simple case, without call
}

So instead of checking for any call, check for unavoidable call.
For #22698 cases I see:
mime/quotedprintable/Writer-6                   10.9µs ± 4%      9.2µs ± 3%   -15.02%  (p=0.000 n=8+8)
And:
compress/flate/Encode/Twain/Huffman/1e4-6       99.4µs ± 6%     90.9µs ± 0%    -8.57%  (p=0.000 n=8+8)
compress/flate/Encode/Twain/Huffman/1e5-6       760µs ± 1%      725µs ± 1%     -4.56%  (p=0.000 n=8+8)
compress/flate/Encode/Twain/Huffman/1e6-6       7.55ms ± 0%      7.24ms ± 0%     -4.07%  (p=0.000 n=8+7)

There are no significant changes on go1 benchmarks.
But for cases with runtime arch checks, where we call generic version on old hardware,
there are respectable performance gains:
math/RoundToEven-6                             1.43ns ± 0%     1.25ns ± 0%   -12.59%  (p=0.001 n=7+7)
math/bits/OnesCount64-6                        1.60ns ± 1%     1.42ns ± 1%   -11.32%  (p=0.000 n=8+8)

Also on some runtime benchmarks loops have less loads and higher performance:
runtime/RuneIterate/range1/ASCII-6             15.6ns ± 1%     13.9ns ± 1%   -10.74%  (p=0.000 n=7+8)
runtime/ArrayEqual-6                           3.22ns ± 0%     2.86ns ± 2%   -11.06%  (p=0.000 n=7+8)

Fixes #22698
Updates #22234

Change-Id: I0ae2f19787d07a9026f064366dedbe601bf7257a
Reviewed-on: https://go-review.googlesource.com/84055
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/ssa/likelyadjust.go
src/cmd/compile/internal/ssa/regalloc.go

index 5f4c5d1ccd4cf49276a609a2a8fa10a92246ebe7..012dd77868098d45a1244a9688b3ebf338c66122 100644 (file)
@@ -23,7 +23,7 @@ type loop struct {
        isInner bool  // True if never discovered to contain a loop
 
        // register allocation uses this.
-       containsCall bool // if any block in this loop or any loop within it contains has a call
+       containsUnavoidableCall bool // True if all paths through the loop have a call
 }
 
 // outerinner records that outer contains inner
@@ -47,28 +47,18 @@ func (sdom SparseTree) outerinner(outer, inner *loop) {
 
        inner.outer = outer
        outer.isInner = false
-       if inner.containsCall {
-               outer.setContainsCall()
-       }
 }
 
-func (l *loop) setContainsCall() {
-       for ; l != nil && !l.containsCall; l = l.outer {
-               l.containsCall = true
-       }
-
-}
-func (l *loop) checkContainsCall(bb *Block) {
+func checkContainsCall(bb *Block) bool {
        if bb.Kind == BlockDefer {
-               l.setContainsCall()
-               return
+               return true
        }
        for _, v := range bb.Values {
                if opcodeTable[v.Op].call {
-                       l.setContainsCall()
-                       return
+                       return true
                }
        }
+       return false
 }
 
 type loopnest struct {
@@ -323,7 +313,6 @@ func loopnestfor(f *Func) *loopnest {
                                        l = &loop{header: bb, isInner: true}
                                        loops = append(loops, l)
                                        b2l[bb.ID] = l
-                                       l.checkContainsCall(bb)
                                }
                        } else if !visited[bb.ID] { // Found an irreducible loop
                                sawIrred = true
@@ -371,7 +360,6 @@ func loopnestfor(f *Func) *loopnest {
 
                if innermost != nil {
                        b2l[b.ID] = innermost
-                       innermost.checkContainsCall(b)
                        innermost.nBlocks++
                }
                visited[b.ID] = true
@@ -379,6 +367,65 @@ func loopnestfor(f *Func) *loopnest {
 
        ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
 
+       // Calculate containsUnavoidableCall for regalloc
+       dominatedByCall := make([]bool, f.NumBlocks())
+       for _, b := range po {
+               if checkContainsCall(b) {
+                       dominatedByCall[b.ID] = true
+               }
+       }
+       // Run dfs to find path through the loop that avoids all calls.
+       // Such path either escapes loop or return back to header.
+       // It isn't enough to have exit not dominated by any call, for example:
+       // ... some loop
+       // call1   call2
+       //   \      /
+       //     exit
+       // ...
+       // exit is not dominated by any call, but we don't have call-free path to it.
+       for _, l := range loops {
+               // Header contains call.
+               if dominatedByCall[l.header.ID] {
+                       l.containsUnavoidableCall = true
+                       continue
+               }
+               callfreepath := false
+               tovisit := make([]*Block, 0, len(l.header.Succs))
+               // Push all non-loop non-exit successors of header onto toVisit.
+               for _, s := range l.header.Succs {
+                       nb := s.Block()
+                       // This corresponds to loop with zero iterations.
+                       if !l.iterationEnd(nb, b2l) {
+                               tovisit = append(tovisit, nb)
+                       }
+               }
+               for len(tovisit) > 0 {
+                       cur := tovisit[len(tovisit)-1]
+                       tovisit = tovisit[:len(tovisit)-1]
+                       if dominatedByCall[cur.ID] {
+                               continue
+                       }
+                       // Record visited in dominatedByCall.
+                       dominatedByCall[cur.ID] = true
+                       for _, s := range cur.Succs {
+                               nb := s.Block()
+                               if l.iterationEnd(nb, b2l) {
+                                       callfreepath = true
+                               }
+                               if !dominatedByCall[nb.ID] {
+                                       tovisit = append(tovisit, nb)
+                               }
+
+                       }
+                       if callfreepath {
+                               break
+                       }
+               }
+               if !callfreepath {
+                       l.containsUnavoidableCall = true
+               }
+       }
+
        // Curious about the loopiness? "-d=ssa/likelyadjust/stats"
        if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 {
                ln.assembleChildren()
@@ -391,7 +438,7 @@ func loopnestfor(f *Func) *loopnest {
                for _, l := range loops {
                        x := len(l.exits)
                        cf := 0
-                       if !l.containsCall {
+                       if !l.containsUnavoidableCall {
                                cf = 1
                        }
                        inner := 0
@@ -401,7 +448,7 @@ func loopnestfor(f *Func) *loopnest {
 
                        f.LogStat("loopstats:",
                                l.depth, "depth", x, "exits",
-                               inner, "is_inner", cf, "is_callfree", l.nBlocks, "n_blocks")
+                               inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks")
                }
        }
 
@@ -519,3 +566,10 @@ func (l *loop) setDepth(d int16) {
                c.setDepth(d + 1)
        }
 }
+
+// iterationEnd checks if block b ends iteration of loop l.
+// Ending iteration means either escaping to outer loop/code or
+// going back to header
+func (l *loop) iterationEnd(b *Block, b2l []*loop) bool {
+       return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth)
+}
index ea88da3b9c16dad598eab80810aa779f39fad3cd..09efc90e72a63c3d7b1c521f95f03e9d4e78a9af 100644 (file)
@@ -1415,7 +1415,7 @@ func (s *regAllocState) regalloc(f *Func) {
                        // For this to be worthwhile, the loop must have no calls in it.
                        top := b.Succs[0].b
                        loop := s.loopnest.b2l[top.ID]
-                       if loop == nil || loop.header != top || loop.containsCall {
+                       if loop == nil || loop.header != top || loop.containsUnavoidableCall {
                                goto badloop
                        }