]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: improve startRegs calculation
authorCherry Zhang <cherryyz@google.com>
Thu, 23 Mar 2017 01:34:12 +0000 (21:34 -0400)
committerCherry Zhang <cherryyz@google.com>
Wed, 29 Mar 2017 19:45:32 +0000 (19:45 +0000)
In register allocation, we calculate what values are used in
and after the current block. If a value is used only after a
function call, since registers are clobbered in call, we don't
need to mark the value live at the entrance of the block.
Before this CL it is considered live, and unnecessary copy or
load may be generated when resolving merge edge.

Fixes #14761.

On AMD64:
name                      old time/op    new time/op    delta
BinaryTree17-12              2.84s ± 1%     2.81s ± 1%   -1.06%  (p=0.000 n=10+9)
Fannkuch11-12                3.61s ± 0%     3.55s ± 1%   -1.77%  (p=0.000 n=10+9)
FmtFprintfEmpty-12          50.4ns ± 4%    50.0ns ± 1%     ~     (p=0.785 n=9+8)
FmtFprintfString-12         80.0ns ± 3%    78.2ns ± 3%   -2.35%  (p=0.004 n=10+9)
FmtFprintfInt-12            81.3ns ± 4%    81.8ns ± 2%     ~     (p=0.159 n=10+10)
FmtFprintfIntInt-12          120ns ± 4%     118ns ± 2%     ~     (p=0.218 n=10+10)
FmtFprintfPrefixedInt-12     152ns ± 3%     155ns ± 2%   +2.11%  (p=0.026 n=10+10)
FmtFprintfFloat-12           240ns ± 1%     238ns ± 1%   -0.79%  (p=0.005 n=9+9)
FmtManyArgs-12               504ns ± 1%     510ns ± 1%   +1.14%  (p=0.000 n=8+9)
GobDecode-12                7.00ms ± 1%    6.99ms ± 0%     ~     (p=0.497 n=9+10)
GobEncode-12                5.47ms ± 1%    5.48ms ± 1%     ~     (p=0.218 n=10+10)
Gzip-12                      258ms ± 2%     256ms ± 1%   -0.96%  (p=0.043 n=10+9)
Gunzip-12                   38.6ms ± 0%    38.3ms ± 0%   -0.64%  (p=0.000 n=9+8)
HTTPClientServer-12         90.4µs ± 3%    87.2µs ±11%     ~     (p=0.053 n=9+10)
JSONEncode-12               15.6ms ± 0%    15.6ms ± 1%     ~     (p=0.077 n=9+9)
JSONDecode-12               55.1ms ± 1%    54.6ms ± 1%   -0.85%  (p=0.010 n=10+9)
Mandelbrot200-12            4.49ms ± 0%    4.47ms ± 0%   -0.25%  (p=0.000 n=10+8)
GoParse-12                  3.38ms ± 0%    3.37ms ± 1%     ~     (p=0.315 n=8+10)
RegexpMatchEasy0_32-12      82.5ns ± 4%    82.0ns ± 0%     ~     (p=0.164 n=10+8)
RegexpMatchEasy0_1K-12       203ns ± 1%     202ns ± 1%   -0.85%  (p=0.000 n=9+10)
RegexpMatchEasy1_32-12      82.3ns ± 1%    81.1ns ± 0%   -1.39%  (p=0.000 n=10+8)
RegexpMatchEasy1_1K-12       357ns ± 1%     357ns ± 1%     ~     (p=0.697 n=8+9)
RegexpMatchMedium_32-12      125ns ± 2%     126ns ± 2%     ~     (p=0.197 n=10+10)
RegexpMatchMedium_1K-12     39.6µs ± 3%    39.6µs ± 1%     ~     (p=0.971 n=10+10)
RegexpMatchHard_32-12       1.99µs ± 2%    1.99µs ± 4%     ~     (p=0.891 n=10+9)
RegexpMatchHard_1K-12       60.1µs ± 3%    60.4µs ± 3%     ~     (p=0.684 n=10+10)
Revcomp-12                   531ms ± 6%     441ms ± 0%  -16.94%  (p=0.000 n=10+9)
Template-12                 58.9ms ± 1%    58.7ms ± 1%     ~     (p=0.315 n=10+10)
TimeParse-12                 319ns ± 1%     320ns ± 4%     ~     (p=0.215 n=9+9)
TimeFormat-12                345ns ± 0%     333ns ± 1%   -3.36%  (p=0.000 n=9+10)
[Geo mean]                  52.2µs         51.6µs        -1.13%

On ARM64:
name                     old time/op    new time/op    delta
BinaryTree17-8              8.53s ± 0%     8.36s ± 0%   -1.89%  (p=0.000 n=10+10)
Fannkuch11-8                6.15s ± 0%     6.10s ± 0%   -0.67%  (p=0.000 n=10+10)
FmtFprintfEmpty-8           117ns ± 0%     117ns ± 0%     ~     (all equal)
FmtFprintfString-8          192ns ± 0%     192ns ± 0%     ~     (all equal)
FmtFprintfInt-8             198ns ± 0%     198ns ± 0%     ~     (p=0.211 n=10+10)
FmtFprintfIntInt-8          289ns ± 0%     291ns ± 0%   +0.59%  (p=0.000 n=7+10)
FmtFprintfPrefixedInt-8     320ns ± 2%     317ns ± 0%     ~     (p=0.431 n=10+8)
FmtFprintfFloat-8           538ns ± 0%     538ns ± 0%     ~     (all equal)
FmtManyArgs-8              1.17µs ± 1%    1.18µs ± 1%     ~     (p=0.063 n=10+10)
GobDecode-8                17.0ms ± 1%    17.2ms ± 1%   +0.83%  (p=0.000 n=10+10)
GobEncode-8                14.2ms ± 0%    14.1ms ± 1%   -0.78%  (p=0.001 n=9+10)
Gzip-8                      806ms ± 0%     797ms ± 0%   -1.12%  (p=0.000 n=6+9)
Gunzip-8                    131ms ± 0%     130ms ± 0%   -0.51%  (p=0.000 n=10+9)
HTTPClientServer-8          206µs ± 9%     212µs ± 2%     ~     (p=0.829 n=10+8)
JSONEncode-8               40.1ms ± 0%    40.1ms ± 0%     ~     (p=0.136 n=9+9)
JSONDecode-8                157ms ± 0%     151ms ± 0%   -3.32%  (p=0.000 n=9+9)
Mandelbrot200-8            10.1ms ± 0%    10.1ms ± 0%   -0.05%  (p=0.000 n=9+8)
GoParse-8                  8.43ms ± 0%    8.43ms ± 0%     ~     (p=0.912 n=10+10)
RegexpMatchEasy0_32-8       228ns ± 1%     227ns ± 0%   -0.26%  (p=0.026 n=10+9)
RegexpMatchEasy0_1K-8      1.92µs ± 0%    1.63µs ± 0%  -15.18%  (p=0.001 n=7+7)
RegexpMatchEasy1_32-8       258ns ± 1%     250ns ± 0%   -2.83%  (p=0.000 n=10+10)
RegexpMatchEasy1_1K-8      2.39µs ± 0%    2.13µs ± 0%  -10.94%  (p=0.000 n=9+9)
RegexpMatchMedium_32-8      352ns ± 0%     351ns ± 0%   -0.29%  (p=0.004 n=9+10)
RegexpMatchMedium_1K-8      104µs ± 0%     105µs ± 0%   +0.58%  (p=0.000 n=8+9)
RegexpMatchHard_32-8       5.84µs ± 0%    5.82µs ± 0%   -0.27%  (p=0.000 n=9+10)
RegexpMatchHard_1K-8        177µs ± 0%     177µs ± 0%   -0.07%  (p=0.000 n=9+9)
Revcomp-8                   1.57s ± 1%     1.50s ± 1%   -4.60%  (p=0.000 n=9+10)
Template-8                  157ms ± 1%     153ms ± 1%   -2.28%  (p=0.000 n=10+9)
TimeParse-8                 779ns ± 1%     770ns ± 1%   -1.18%  (p=0.013 n=10+10)
TimeFormat-8                823ns ± 2%     826ns ± 1%     ~     (p=0.324 n=10+9)
[Geo mean]                  144µs          142µs        -1.45%

Reduce cmd/go text size by 0.5%.

Change-Id: I9288ff983c4a7cf03fc0cb35b9b1750828013117
Reviewed-on: https://go-review.googlesource.com/38457
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/regalloc.go

index f934edfcac36c76e33f6d33524d542149ae7baf8..7361e1392bae052f6ddf9b906274117bd0d34ed7 100644 (file)
@@ -241,6 +241,9 @@ type regAllocState struct {
        // current state of each (preregalloc) Value
        values []valState
 
+       // ID of SP, SB values
+       sp, sb ID
+
        // For each Value, map from its value ID back to the
        // preregalloc Value it was derived from.
        orig []*Value
@@ -709,8 +712,8 @@ func (s *regAllocState) compatRegs(t Type) regMask {
 }
 
 func (s *regAllocState) regalloc(f *Func) {
-       liveSet := f.newSparseSet(f.NumValues())
-       defer f.retSparseSet(liveSet)
+       regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register
+       defer f.retSparseSet(regValLiveSet)
        var oldSched []*Value
        var phis []*Value
        var phiRegs []register
@@ -733,32 +736,42 @@ func (s *regAllocState) regalloc(f *Func) {
        for _, b := range f.Blocks {
                s.curBlock = b
 
-               // Initialize liveSet and uses fields for this block.
+               // Initialize regValLiveSet and uses fields for this block.
                // Walk backwards through the block doing liveness analysis.
-               liveSet.clear()
+               regValLiveSet.clear()
                for _, e := range s.live[b.ID] {
                        s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block
-                       liveSet.add(e.ID)
+                       regValLiveSet.add(e.ID)
                }
                if v := b.Control; v != nil && s.values[v.ID].needReg {
                        s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value
-                       liveSet.add(v.ID)
+                       regValLiveSet.add(v.ID)
                }
                for i := len(b.Values) - 1; i >= 0; i-- {
                        v := b.Values[i]
-                       liveSet.remove(v.ID)
+                       regValLiveSet.remove(v.ID)
                        if v.Op == OpPhi {
                                // Remove v from the live set, but don't add
                                // any inputs. This is the state the len(b.Preds)>1
                                // case below desires; it wants to process phis specially.
                                continue
                        }
+                       if opcodeTable[v.Op].call {
+                               // Function call clobbers all the registers but SP and SB.
+                               regValLiveSet.clear()
+                               if s.sp != 0 && s.values[s.sp].uses != nil {
+                                       regValLiveSet.add(s.sp)
+                               }
+                               if s.sb != 0 && s.values[s.sb].uses != nil {
+                                       regValLiveSet.add(s.sb)
+                               }
+                       }
                        for _, a := range v.Args {
                                if !s.values[a.ID].needReg {
                                        continue
                                }
                                s.addUse(a.ID, int32(i), v.Pos)
-                               liveSet.add(a.ID)
+                               regValLiveSet.add(a.ID)
                        }
                }
                if s.f.pass.debug > regDebug {
@@ -808,7 +821,7 @@ func (s *regAllocState) regalloc(f *Func) {
                        // live but only used by some other successor of p.
                        for r := register(0); r < s.numRegs; r++ {
                                v := s.regs[r].v
-                               if v != nil && !liveSet.contains(v.ID) {
+                               if v != nil && !regValLiveSet.contains(v.ID) {
                                        s.freeReg(r)
                                }
                        }
@@ -864,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) {
                                        continue
                                }
                                a := v.Args[idx]
-                               if !liveSet.contains(a.ID) {
+                               if !regValLiveSet.contains(a.ID) {
                                        // Input is dead beyond the phi, deallocate
                                        // anywhere else it might live.
                                        s.freeRegs(s.values[a.ID].regs)
@@ -932,6 +945,17 @@ func (s *regAllocState) regalloc(f *Func) {
                                s.assignReg(r, v, v)
                        }
 
+                       // Deallocate any values which are no longer live. Phis are excluded.
+                       for r := register(0); r < s.numRegs; r++ {
+                               if phiUsed>>r&1 != 0 {
+                                       continue
+                               }
+                               v := s.regs[r].v
+                               if v != nil && !regValLiveSet.contains(v.ID) {
+                                       s.freeReg(r)
+                               }
+                       }
+
                        // Save the starting state for use by merge edges.
                        var regList []startReg
                        for r := register(0); r < s.numRegs; r++ {
@@ -1034,12 +1058,14 @@ func (s *regAllocState) regalloc(f *Func) {
                                s.assignReg(s.SPReg, v, v)
                                b.Values = append(b.Values, v)
                                s.advanceUses(v)
+                               s.sp = v.ID
                                continue
                        }
                        if v.Op == OpSB {
                                s.assignReg(s.SBReg, v, v)
                                b.Values = append(b.Values, v)
                                s.advanceUses(v)
+                               s.sb = v.ID
                                continue
                        }
                        if v.Op == OpSelect0 || v.Op == OpSelect1 {
@@ -1435,16 +1461,16 @@ func (s *regAllocState) regalloc(f *Func) {
                s.endRegs[b.ID] = regList
 
                if checkEnabled {
-                       liveSet.clear()
+                       regValLiveSet.clear()
                        for _, x := range s.live[b.ID] {
-                               liveSet.add(x.ID)
+                               regValLiveSet.add(x.ID)
                        }
                        for r := register(0); r < s.numRegs; r++ {
                                v := s.regs[r].v
                                if v == nil {
                                        continue
                                }
-                               if !liveSet.contains(v.ID) {
+                               if !regValLiveSet.contains(v.ID) {
                                        s.f.Fatalf("val %s is in reg but not live at end of %s", v, b)
                                }
                        }