]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: load some live values into registers before loop
authorKeith Randall <khr@golang.org>
Wed, 2 Mar 2016 23:18:40 +0000 (15:18 -0800)
committerKeith Randall <khr@golang.org>
Thu, 3 Mar 2016 17:39:43 +0000 (17:39 +0000)
If we're about to enter a loop, load values which are live
and will soon be used in the loop into registers.

name                     old time/op    new time/op    delta
BinaryTree17-8              2.80s ± 4%     2.62s ± 2%   -6.43%          (p=0.008 n=5+5)
Fannkuch11-8                2.45s ± 2%     2.14s ± 1%  -12.43%          (p=0.008 n=5+5)
FmtFprintfEmpty-8          49.0ns ± 1%    48.4ns ± 1%   -1.35%          (p=0.032 n=5+5)
FmtFprintfString-8          160ns ± 1%     153ns ± 0%   -4.63%          (p=0.008 n=5+5)
FmtFprintfInt-8             152ns ± 0%     150ns ± 0%   -1.57%          (p=0.000 n=5+4)
FmtFprintfIntInt-8          252ns ± 2%     244ns ± 1%   -3.02%          (p=0.008 n=5+5)
FmtFprintfPrefixedInt-8     223ns ± 0%     223ns ± 0%     ~     (all samples are equal)
FmtFprintfFloat-8           293ns ± 2%     291ns ± 2%     ~             (p=0.389 n=5+5)
FmtManyArgs-8               956ns ± 0%     936ns ± 0%   -2.05%          (p=0.008 n=5+5)
GobDecode-8                7.18ms ± 0%    7.11ms ± 0%   -1.02%          (p=0.008 n=5+5)
GobEncode-8                6.12ms ± 3%    6.07ms ± 1%     ~             (p=0.690 n=5+5)
Gzip-8                      284ms ± 1%     284ms ± 0%     ~             (p=1.000 n=5+5)
Gunzip-8                   40.8ms ± 1%    40.6ms ± 1%     ~             (p=0.310 n=5+5)
HTTPClientServer-8         69.8µs ± 1%    72.2µs ± 4%     ~             (p=0.056 n=5+5)
JSONEncode-8               16.1ms ± 2%    16.2ms ± 1%     ~             (p=0.151 n=5+5)
JSONDecode-8               54.9ms ± 0%    57.0ms ± 1%   +3.79%          (p=0.008 n=5+5)
Mandelbrot200-8            4.35ms ± 0%    4.39ms ± 0%   +0.85%          (p=0.008 n=5+5)
GoParse-8                  3.56ms ± 1%    3.42ms ± 1%   -4.03%          (p=0.008 n=5+5)
RegexpMatchEasy0_32-8      75.6ns ± 1%    75.0ns ± 0%   -0.83%          (p=0.016 n=5+4)
RegexpMatchEasy0_1K-8       250ns ± 0%     252ns ± 1%   +0.80%          (p=0.016 n=4+5)
RegexpMatchEasy1_32-8      75.0ns ± 0%    75.4ns ± 2%     ~             (p=0.206 n=5+5)
RegexpMatchEasy1_1K-8       401ns ± 0%     398ns ± 1%     ~             (p=0.056 n=5+5)
RegexpMatchMedium_32-8      119ns ± 0%     118ns ± 0%   -0.84%          (p=0.008 n=5+5)
RegexpMatchMedium_1K-8     36.6µs ± 0%    36.9µs ± 0%   +0.91%          (p=0.008 n=5+5)
RegexpMatchHard_32-8       1.95µs ± 1%    1.92µs ± 0%   -1.23%          (p=0.032 n=5+5)
RegexpMatchHard_1K-8       58.3µs ± 1%    58.1µs ± 1%     ~             (p=0.548 n=5+5)
Revcomp-8                   425ms ± 1%     389ms ± 1%   -8.39%          (p=0.008 n=5+5)
Template-8                 65.5ms ± 1%    63.6ms ± 1%   -2.86%          (p=0.008 n=5+5)
TimeParse-8                 363ns ± 0%     354ns ± 1%   -2.59%          (p=0.008 n=5+5)
TimeFormat-8                363ns ± 0%     364ns ± 1%     ~             (p=0.159 n=5+5)

Fixes #14511

Change-Id: I1b79d2545271fa90d5b04712cc25573bdc94f2ce
Reviewed-on: https://go-review.googlesource.com/20151
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/regalloc.go
src/cmd/compile/internal/ssa/type.go

index e5d72ced4fdc3c467cae22b1d670cb4ace10da17..f83b88d79e0813fb09a4797f2a03ddc9df2a9a39 100644 (file)
@@ -5081,8 +5081,10 @@ func moveByType(t ssa.Type) int {
                        return x86.AMOVL
                case 8:
                        return x86.AMOVQ
+               case 16:
+                       return x86.AMOVUPS // int128s are in SSE registers
                default:
-                       panic("bad int register width")
+                       panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
                }
        }
        panic("bad register type")
index f9680e42029cd3e3456d9c59379ae2e387d41c14..39c69cfeeda55deba79040adeeb200568a5e845e 100644 (file)
@@ -102,6 +102,14 @@ import (
 const regDebug = false // TODO: compiler flag
 const logSpills = false
 
+// distance is a measure of how far into the future values are used.
+// distance is measured in units of instructions.
+const (
+       likelyDistance   = 1
+       normalDistance   = 10
+       unlikelyDistance = 100
+)
+
 // regalloc performs register allocation on f. It sets f.RegAlloc
 // to the resulting allocation.
 func regalloc(f *Func) {
@@ -550,7 +558,7 @@ func (s *regAllocState) setState(regs []endReg) {
 // compatRegs returns the set of registers which can store a type t.
 func (s *regAllocState) compatRegs(t Type) regMask {
        var m regMask
-       if t.IsFloat() {
+       if t.IsFloat() || t == TypeInt128 {
                m = 0xffff << 16 // X0-X15
        } else {
                m = 0xffef << 0 // AX-R15, except SP
@@ -576,8 +584,12 @@ func (s *regAllocState) regalloc(f *Func) {
                // Initialize liveSet and uses fields for this block.
                // Walk backwards through the block doing liveness analysis.
                liveSet.clear()
+               d := int32(len(b.Values))
+               if b.Kind == BlockCall {
+                       d += unlikelyDistance
+               }
                for _, e := range s.live[b.ID] {
-                       s.addUse(e.ID, int32(len(b.Values))+e.dist) // pseudo-uses from beyond end of block
+                       s.addUse(e.ID, d+e.dist) // pseudo-uses from beyond end of block
                        liveSet.add(e.ID)
                }
                if v := b.Control; v != nil && s.values[v.ID].needReg {
@@ -944,11 +956,11 @@ func (s *regAllocState) regalloc(f *Func) {
                        }
                }
 
+               // Load control value into reg.
                if v := b.Control; v != nil && s.values[v.ID].needReg {
                        if regDebug {
                                fmt.Printf("  processing control %s\n", v.LongString())
                        }
-                       // Load control value into reg.
                        // TODO: regspec for block control values, instead of using
                        // register set from the control op's output.
                        s.allocValToReg(v, opcodeTable[v.Op].reg.outputs[0], false, b.Line)
@@ -963,6 +975,53 @@ func (s *regAllocState) regalloc(f *Func) {
                        s.freeUseRecords = u
                }
 
+               // If we are approaching a merge point and we are the primary
+               // predecessor of it, find live values that we use soon after
+               // the merge point and promote them to registers now.
+               if len(b.Succs) == 1 && len(b.Succs[0].Preds) > 1 && b.Succs[0].Preds[s.primary[b.Succs[0].ID]] == b {
+                       // For this to be worthwhile, the loop must have no calls in it.
+                       // Use a very simple loop detector. TODO: incorporate David's loop stuff
+                       // once it is in.
+                       top := b.Succs[0]
+                       for _, p := range top.Preds {
+                               if p == b {
+                                       continue
+                               }
+                               for {
+                                       if p.Kind == BlockCall {
+                                               goto badloop
+                                       }
+                                       if p == top {
+                                               break
+                                       }
+                                       if len(p.Preds) != 1 {
+                                               goto badloop
+                                       }
+                                       p = p.Preds[0]
+                               }
+                       }
+
+                       // TODO: sort by distance, pick the closest ones?
+                       for _, live := range s.live[b.ID] {
+                               if live.dist >= unlikelyDistance {
+                                       // Don't preload anything live after the loop.
+                                       continue
+                               }
+                               vid := live.ID
+                               vi := &s.values[vid]
+                               if vi.regs != 0 {
+                                       continue
+                               }
+                               v := s.orig[vid]
+                               m := s.compatRegs(v.Type) &^ s.used
+                               if m != 0 {
+                                       s.allocValToReg(v, m, false, b.Line)
+                               }
+                       }
+               }
+       badloop:
+               ;
+
                // Save end-of-block register state.
                // First count how many, this cuts allocations in half.
                k := 0
@@ -1539,8 +1598,14 @@ func (s *regAllocState) computeLive() {
                        // Add len(b.Values) to adjust from end-of-block distance
                        // to beginning-of-block distance.
                        live.clear()
+                       d := int32(len(b.Values))
+                       if b.Kind == BlockCall {
+                               // Because we keep no values in registers across a call,
+                               // make every use past a call very far away.
+                               d += unlikelyDistance
+                       }
                        for _, e := range s.live[b.ID] {
-                               live.set(e.ID, e.dist+int32(len(b.Values)))
+                               live.set(e.ID, e.dist+d)
                        }
 
                        // Mark control value as live
@@ -1570,20 +1635,17 @@ func (s *regAllocState) computeLive() {
                        // invariant: live contains the values live at the start of b (excluding phi inputs)
                        for i, p := range b.Preds {
                                // Compute additional distance for the edge.
-                               const normalEdge = 10
-                               const likelyEdge = 1
-                               const unlikelyEdge = 100
                                // Note: delta must be at least 1 to distinguish the control
                                // value use from the first user in a successor block.
-                               delta := int32(normalEdge)
+                               delta := int32(normalDistance)
                                if len(p.Succs) == 2 {
                                        if p.Succs[0] == b && p.Likely == BranchLikely ||
                                                p.Succs[1] == b && p.Likely == BranchUnlikely {
-                                               delta = likelyEdge
+                                               delta = likelyDistance
                                        }
                                        if p.Succs[0] == b && p.Likely == BranchUnlikely ||
                                                p.Succs[1] == b && p.Likely == BranchLikely {
-                                               delta = unlikelyEdge
+                                               delta = unlikelyDistance
                                        }
                                }
 
index a23989c82e25c01e4c7ec8c7be32440eff91ba29..c0174cce4fd916a41b6e648e1a5c5095e1b334b1 100644 (file)
@@ -46,13 +46,14 @@ type Type interface {
 // Special compiler-only types.
 type CompilerType struct {
        Name   string
+       size   int64
        Memory bool
        Flags  bool
        Void   bool
        Int128 bool
 }
 
-func (t *CompilerType) Size() int64            { return 0 } // Size in bytes
+func (t *CompilerType) Size() int64            { return t.size } // Size in bytes
 func (t *CompilerType) Alignment() int64       { return 0 }
 func (t *CompilerType) IsBoolean() bool        { return false }
 func (t *CompilerType) IsInteger() bool        { return false }
@@ -127,5 +128,5 @@ var (
        TypeMem     = &CompilerType{Name: "mem", Memory: true}
        TypeFlags   = &CompilerType{Name: "flags", Flags: true}
        TypeVoid    = &CompilerType{Name: "void", Void: true}
-       TypeInt128  = &CompilerType{Name: "int128", Int128: true}
+       TypeInt128  = &CompilerType{Name: "int128", size: 16, Int128: true}
 )