cmd/compile: put spills in better places

author David Chase <drchase@google.com>

Tue, 7 Mar 2017 19:45:46 +0000 (14:45 -0500)

committer Keith Randall <khr@golang.org>

Wed, 15 Mar 2017 02:09:25 +0000 (02:09 +0000)
author David Chase <drchase@google.com>
Tue, 7 Mar 2017 19:45:46 +0000 (14:45 -0500)
committer Keith Randall <khr@golang.org>
Wed, 15 Mar 2017 02:09:25 +0000 (02:09 +0000)
diff --git a/src/cmd/compile/internal/ssa/dom.go b/src/cmd/compile/internal/ssa/dom.go

index 4790e3383aa5af8438479e881f21db5f41526b50..3dae5fbf076de7ccecd72c5ff108b2b65bfe8594 100644 (file)
--- a/src/cmd/compile/internal/ssa/dom.go
+++ b/src/cmd/compile/internal/ssa/dom.go
@@ -296,7 +296,8 @@ func dominatorsSimple(f *Func) []*Block {
  // intersect finds the closest dominator of both b and c.
  // It requires a postorder numbering of all the blocks.
  func intersect(b, c *Block, postnum []int, idom []*Block) *Block {
-       // TODO: This loop is O(n^2). See BenchmarkNilCheckDeep*.
+       // TODO: This loop is O(n^2). It used to be used in nilcheck,
+       // see BenchmarkNilCheckDeep*.
         for b != c {
                 if postnum[b.ID] < postnum[c.ID] {
                         b = idom[b.ID]
diff --git a/src/cmd/compile/internal/ssa/func.go b/src/cmd/compile/internal/ssa/func.go

index b0058760969ba87b2270784e68ce7b20adc99637..5dc352e991622ae5895478f66672a39e43ab79c6 100644 (file)
--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@@ -104,6 +104,31 @@ func (f *Func) newValue(op Op, t Type, b *Block, pos src.XPos) *Value {
         return v
  }
  
+// newValueNoBlock allocates a new Value with the given fields.
+// The returned value is not placed in any block.  Once the caller
+// decides on a block b, it must set b.Block and append
+// the returned value to b.Values.
+func (f *Func) newValueNoBlock(op Op, t Type, pos src.XPos) *Value {
+       var v *Value
+       if f.freeValues != nil {
+               v = f.freeValues
+               f.freeValues = v.argstorage[0]
+               v.argstorage[0] = nil
+       } else {
+               ID := f.vid.get()
+               if int(ID) < len(f.Config.values) {
+                       v = &f.Config.values[ID]
+               } else {
+                       v = &Value{ID: ID}
+               }
+       }
+       v.Op = op
+       v.Type = t
+       v.Block = nil // caller must fix this.
+       v.Pos = pos
+       return v
+}
+
  // logPassStat writes a string key and int value as a warning in a
  // tab-separated format easily handled by spreadsheets or awk.
  // file names, lines, and function names are included to provide enough (?)
diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go

index e40fe17ad4cbe60284b6c7b565c3926245d072ef..95f2f7c91a03bd27c3133fd7699295b121f5abbe 100644 (file)
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@@ -26,28 +26,48 @@
  
  // Spilling
  //
-// For every value, we generate a spill immediately after the value itself.
-//     x = Op y z    : AX
-//     x2 = StoreReg x
-// While AX still holds x, any uses of x will use that value. When AX is needed
-// for another value, we simply reuse AX.  Spill code has already been generated
-// so there is no code generated at "spill" time. When x is referenced
-// subsequently, we issue a load to restore x to a register using x2 as
-//  its argument:
-//    x3 = Restore x2 : CX
-// x3 can then be used wherever x is referenced again.
-// If the spill (x2) is never used, it will be removed at the end of regalloc.
+// During the normal course of the allocator, we might throw a still-live
+// value out of all registers. When that value is subsequently used, we must
+// load it from a slot on the stack. We must also issue an instruction to
+// initialize that stack location with a copy of v.
+//
+// pre-regalloc:
+//   (1) v = Op ...
+//   (2) x = Op ...
+//   (3) ... = Op v ...
+//
+// post-regalloc:
+//   (1) v = Op ...    : AX // computes v, store result in AX
+//       s = StoreReg v     // spill v to a stack slot
+//   (2) x = Op ...    : AX // some other op uses AX
+//       c = LoadReg s : CX // restore v from stack slot
+//   (3) ... = Op c ...     // use the restored value
+//
+// Allocation occurs normally until we reach (3) and we realize we have
+// a use of v and it isn't in any register. At that point, we allocate
+// a spill (a StoreReg) for v. We can't determine the correct place for
+// the spill at this point, so we allocate the spill as blockless initially.
+// The restore is then generated to load v back into a register so it can
+// be used. Subsequent uses of v will use the restored value c instead.
+//
+// What remains is the question of where to schedule the spill.
+// During allocation, we keep track of the dominator of all restores of v.
+// The spill of v must dominate that block. The spill must also be issued at
+// a point where v is still in a register.
+//
+// To find the right place, start at b, the block which dominates all restores.
+//  - If b is v.Block, then issue the spill right after v.
+//    It is known to be in a register at that point, and dominates any restores.
+//  - Otherwise, if v is in a register at the start of b,
+//    put the spill of v at the start of b.
+//  - Otherwise, set b = immediate dominator of b, and repeat.
  //
  // Phi values are special, as always. We define two kinds of phis, those
  // where the merge happens in a register (a "register" phi) and those where
  // the merge happens in a stack location (a "stack" phi).
  //
  // A register phi must have the phi and all of its inputs allocated to the
-// same register. Register phis are spilled similarly to regular ops:
-//     b1: y = ... : AX        b2: z = ... : AX
-//         goto b3                 goto b3
-//     b3: x = phi(y, z) : AX
-//         x2 = StoreReg x
+// same register. Register phis are spilled similarly to regular ops.
  //
  // A stack phi must have the phi and all of its inputs allocated to the same
  // stack location. Stack phis start out life already spilled - each phi
@@ -91,18 +111,6 @@
  // will have no use (so don't run deadcode after regalloc!).
  // TODO: maybe we should introduce these extra phis?
  
-// Additional not-quite-SSA output occurs when spills are sunk out
-// of loops to the targets of exit edges from the loop.  Before sinking,
-// there is one spill site (one StoreReg) targeting stack slot X, after
-// sinking there may be multiple spill sites targeting stack slot X,
-// with no phi functions at any join points reachable by the multiple
-// spill sites.  In addition, uses of the spill from copies of the original
-// will not name the copy in their reference; instead they will name
-// the original, though both will have the same spill location.  The
-// first sunk spill will be the original, but moved, to an exit block,
-// thus ensuring that there is a definition somewhere corresponding to
-// the original spill's uses.
-
  package ssa
  
  import (
@@ -186,14 +194,15 @@ type use struct {
         next *use     // linked list of uses of a value in nondecreasing dist order
  }
  
+// A valState records the register allocation state for a (pre-regalloc) value.
  type valState struct {
         regs              regMask // the set of registers holding a Value (usually just one)
         uses              *use    // list of uses in this block
-       spill             *Value  // spilled copy of the Value
-       spillUsed         bool
-       spillUsedShuffle  bool // true if used in shuffling, after ordinary uses
-       needReg           bool // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
-       rematerializeable bool // cached value of v.rematerializeable()
+       spill             *Value  // spilled copy of the Value (if any)
+       restoreMin        int32   // minimum of all restores' blocks' sdom.entry
+       restoreMax        int32   // maximum of all restores' blocks' sdom.exit
+       needReg           bool    // cached value of !v.Type.IsMemory() && !v.Type.IsVoid() && !.v.Type.IsFlags()
+       rematerializeable bool    // cached value of v.rematerializeable()
  }
  
  type regState struct {
@@ -205,6 +214,7 @@ type regState struct {
  type regAllocState struct {
         f *Func
  
+       sdom        SparseTree
         registers   []Register
         numRegs     register
         SPReg       register
@@ -271,15 +281,6 @@ type regAllocState struct {
         loopnest *loopnest
  }
  
-type spillToSink struct {
-       spill *Value // Spill instruction to move (a StoreReg)
-       dests int32  // Bitmask indicating exit blocks from loop in which spill/val is defined. 1<<i set means val is live into loop.exitBlocks[i]
-}
-
-func (sts *spillToSink) spilledValue() *Value {
-       return sts.spill.Args[0]
-}
-
  type endReg struct {
         r register
         v *Value // pre-regalloc value held in this register (TODO: can we use ID here?)
@@ -288,7 +289,8 @@ type endReg struct {
  
  type startReg struct {
         r   register
-       vid ID       // pre-regalloc value needed in this register
+       v   *Value   // pre-regalloc value needed in this register
+       c   *Value   // cached version of the value
         pos src.XPos // source position of use of this register
  }
  
@@ -405,6 +407,28 @@ func (s *regAllocState) allocReg(mask regMask, v *Value) register {
         return r
  }
  
+// makeSpill returns a Value which represents the spilled value of v.
+// b is the block in which the spill is used.
+func (s *regAllocState) makeSpill(v *Value, b *Block) *Value {
+       vi := &s.values[v.ID]
+       if vi.spill != nil {
+               // Final block not known - keep track of subtree where restores reside.
+               vi.restoreMin = min32(vi.restoreMin, s.sdom[b.ID].entry)
+               vi.restoreMax = max32(vi.restoreMax, s.sdom[b.ID].exit)
+               return vi.spill
+       }
+       // Make a spill for v. We don't know where we want
+       // to put it yet, so we leave it blockless for now.
+       spill := s.f.newValueNoBlock(OpStoreReg, v.Type, v.Pos)
+       // We also don't know what the spill's arg will be.
+       // Leave it argless for now.
+       s.setOrig(spill, v)
+       vi.spill = spill
+       vi.restoreMin = s.sdom[b.ID].entry
+       vi.restoreMax = s.sdom[b.ID].exit
+       return spill
+}
+
  // allocValToReg allocates v to a register selected from regMask and
  // returns the register copy of v. Any previous user is kicked out and spilled
  // (if necessary). Load code is added at the current pc. If nospill is set the
@@ -442,17 +466,12 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
                 // Rematerialize instead of loading from the spill location.
                 c = v.copyInto(s.curBlock)
         } else {
-               switch {
                 // Load v from its spill location.
-               case vi.spill != nil:
-                       if s.f.pass.debug > logSpills {
-                               s.f.Config.Warnl(vi.spill.Pos, "load spill for %v from %v", v, vi.spill)
-                       }
-                       c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, vi.spill)
-                       vi.spillUsed = true
-               default:
-                       s.f.Fatalf("attempt to load unspilled value %v", v.LongString())
+               spill := s.makeSpill(v, s.curBlock)
+               if s.f.pass.debug > logSpills {
+                       s.f.Config.Warnl(vi.spill.Pos, "load spill for %v from %v", v, spill)
                 }
+               c = s.curBlock.NewValue1(pos, OpLoadReg, v.Type, spill)
         }
         s.setOrig(c, v)
         s.assignReg(r, v, c)
@@ -615,6 +634,7 @@ func (s *regAllocState) init(f *Func) {
         s.endRegs = make([][]endReg, f.NumBlocks())
         s.startRegs = make([][]startReg, f.NumBlocks())
         s.spillLive = make([][]ID, f.NumBlocks())
+       s.sdom = f.sdom()
  }
  
  // Adds a use record for id at distance dist from the start of the block.
@@ -688,22 +708,6 @@ func (s *regAllocState) compatRegs(t Type) regMask {
         return m & s.allocatable
  }
  
-// loopForBlock returns the loop containing block b,
-// provided that the loop is "interesting" for purposes
-// of improving register allocation (= is inner, and does
-// not contain a call)
-func (s *regAllocState) loopForBlock(b *Block) *loop {
-       loop := s.loopnest.b2l[b.ID]
-
-       // Minor for-the-time-being optimization: nothing happens
-       // unless a loop is both inner and call-free, therefore
-       // don't bother with other loops.
-       if loop != nil && (loop.containsCall || !loop.isInner) {
-               loop = nil
-       }
-       return loop
-}
-
  func (s *regAllocState) regalloc(f *Func) {
         liveSet := f.newSparseSet(f.NumValues())
         defer f.retSparseSet(liveSet)
@@ -712,14 +716,6 @@ func (s *regAllocState) regalloc(f *Func) {
         var phiRegs []register
         var args []*Value
  
-       // statistics
-       var nSpills int               // # of spills remaining
-       var nSpillsInner int          // # of spills remaining in inner loops
-       var nSpillsSunk int           // # of sunk spills remaining
-       var nSpillsChanged int        // # of sunk spills lost because of register use change
-       var nSpillsSunkUnused int     // # of spills not sunk because they were removed completely
-       var nSpillsNotSunkLateUse int // # of spills not sunk because of very late use (in shuffle)
-
         // Data structure used for computing desired registers.
         var desired desiredState
  
@@ -734,24 +730,8 @@ func (s *regAllocState) regalloc(f *Func) {
                 f.Fatalf("entry block must be first")
         }
  
-       // Get loop nest so that spills in inner loops can be
-       // tracked.  When the last block of a loop is processed,
-       // attempt to move spills out of the loop.
-       s.loopnest.findExits()
-
-       // Spills are moved from one block's slice of values to another's.
-       // This confuses register allocation if it occurs before it is
-       // complete, so candidates are recorded, then rechecked and
-       // moved after all allocation (register and stack) is complete.
-       // Because movement is only within a stack slot's lifetime, it
-       // is safe to do this.
-       var toSink []spillToSink
-       // Will be used to figure out live inputs to exit blocks of inner loops.
-       entryCandidates := newSparseMap(f.NumValues())
-
         for _, b := range f.Blocks {
                 s.curBlock = b
-               loop := s.loopForBlock(b)
  
                 // Initialize liveSet and uses fields for this block.
                 // Walk backwards through the block doing liveness analysis.
@@ -945,22 +925,11 @@ func (s *regAllocState) regalloc(f *Func) {
                                 if r == noRegister {
                                         // stack-based phi
                                         // Spills will be inserted in all the predecessors below.
-                                       s.values[v.ID].spill = v        // v starts life spilled
-                                       s.values[v.ID].spillUsed = true // use is guaranteed
+                                       s.values[v.ID].spill = v // v starts life spilled
                                         continue
                                 }
                                 // register-based phi
                                 s.assignReg(r, v, v)
-                               // Spill the phi in case we need to restore it later.
-                               spill := b.NewValue1(v.Pos, OpStoreReg, v.Type, v)
-                               s.setOrig(spill, v)
-                               s.values[v.ID].spill = spill
-                               s.values[v.ID].spillUsed = false
-                               if loop != nil {
-                                       loop.spills = append(loop.spills, v)
-                                       nSpillsInner++
-                               }
-                               nSpills++
                         }
  
                         // Save the starting state for use by merge edges.
@@ -975,14 +944,14 @@ func (s *regAllocState) regalloc(f *Func) {
                                         // specially during merge edge processing.
                                         continue
                                 }
-                               regList = append(regList, startReg{r, v.ID, s.values[v.ID].uses.pos})
+                               regList = append(regList, startReg{r, v, s.regs[r].c, s.values[v.ID].uses.pos})
                         }
                         s.startRegs[b.ID] = regList
  
                         if s.f.pass.debug > regDebug {
                                 fmt.Printf("after phis\n")
                                 for _, x := range s.startRegs[b.ID] {
-                                       fmt.Printf("  %s: v%d\n", s.registers[x.r].Name(), x.vid)
+                                       fmt.Printf("  %s: v%d\n", s.registers[x.r].Name(), x.v.ID)
                                 }
                         }
                 }
@@ -1005,7 +974,7 @@ func (s *regAllocState) regalloc(f *Func) {
                         succ := e.b
                         // TODO: prioritize likely successor?
                         for _, x := range s.startRegs[succ.ID] {
-                               desired.add(x.vid, x.r)
+                               desired.add(x.v.ID, x.r)
                         }
                         // Process phi ops in succ.
                         pidx := e.i
@@ -1100,7 +1069,6 @@ func (s *regAllocState) regalloc(f *Func) {
                                 // any register here. We just set up the spill pointer to
                                 // point at itself and any later user will restore it to use it.
                                 s.values[v.ID].spill = v
-                               s.values[v.ID].spillUsed = true // use is guaranteed
                                 b.Values = append(b.Values, v)
                                 s.advanceUses(v)
                                 continue
@@ -1109,7 +1077,7 @@ func (s *regAllocState) regalloc(f *Func) {
                                 // Make sure the argument to v is still live here.
                                 s.advanceUses(v)
                                 vi := &s.values[v.Args[0].ID]
-                               if vi.spillUsed {
+                               if vi.spill != nil {
                                         // Use the spill location.
                                         v.SetArg(0, vi.spill)
                                 } else {
@@ -1373,28 +1341,7 @@ func (s *regAllocState) regalloc(f *Func) {
                         }
                         b.Values = append(b.Values, v)
  
-                       // Issue a spill for this value. We issue spills unconditionally,
-                       // then at the end of regalloc delete the ones we never use.
-                       // TODO: schedule the spill at a point that dominates all restores.
-                       // The restore may be off in an unlikely branch somewhere and it
-                       // would be better to have the spill in that unlikely branch as well.
-                       // v := ...
-                       // if unlikely {
-                       //     f()
-                       // }
-                       // It would be good to have both spill and restore inside the IF.
                 issueSpill:
-                       if s.values[v.ID].needReg {
-                               spill := b.NewValue1(v.Pos, OpStoreReg, v.Type, v)
-                               s.setOrig(spill, v)
-                               s.values[v.ID].spill = spill
-                               s.values[v.ID].spillUsed = false
-                               if loop != nil {
-                                       loop.spills = append(loop.spills, v)
-                                       nSpillsInner++
-                               }
-                               nSpills++
-                       }
                 }
  
                 // Load control value into reg.
@@ -1504,90 +1451,23 @@ func (s *regAllocState) regalloc(f *Func) {
                 }
  
                 // If a value is live at the end of the block and
-               // isn't in a register, remember that its spill location
-               // is live. We need to remember this information so that
+               // isn't in a register, generate a use for the spill location.
+               // We need to remember this information so that
                 // the liveness analysis in stackalloc is correct.
                 for _, e := range s.live[b.ID] {
-                       if s.values[e.ID].regs != 0 {
+                       vi := &s.values[e.ID]
+                       if vi.regs != 0 {
                                 // in a register, we'll use that source for the merge.
                                 continue
                         }
-                       spill := s.values[e.ID].spill
-                       if spill == nil {
-                               // rematerializeable values will have spill==nil.
+                       if vi.rematerializeable {
+                               // we'll rematerialize during the merge.
                                 continue
                         }
+                       //fmt.Printf("live-at-end spill for %s at %s\n", s.orig[e.ID], b)
+                       spill := s.makeSpill(s.orig[e.ID], b)
                         s.spillLive[b.ID] = append(s.spillLive[b.ID], spill.ID)
-                       s.values[e.ID].spillUsed = true
-               }
-
-               // Keep track of values that are spilled in the loop, but whose spill
-               // is not used in the loop.  It may be possible to move ("sink") the
-               // spill out of the loop into one or more exit blocks.
-               if loop != nil {
-                       loop.scratch++                    // increment count of blocks in this loop that have been processed
-                       if loop.scratch == loop.nBlocks { // just processed last block of loop, if it is an inner loop.
-                               // This check is redundant with code at the top of the loop.
-                               // This is definitive; the one at the top of the loop is an optimization.
-                               if loop.isInner && // Common case, easier, most likely to be profitable
-                                       !loop.containsCall && // Calls force spills, also lead to puzzling spill info.
-                                       len(loop.exits) <= 32 { // Almost no inner loops have more than 32 exits,
-                                       // and this allows use of a bitvector and a sparseMap.
-
-                                       // TODO: exit calculation is messed up for non-inner loops
-                                       // because of multilevel exits that are not part of the "exit"
-                                       // count.
-
-                                       // Compute the set of spill-movement candidates live at entry to exit blocks.
-                                       // isLoopSpillCandidate filters for
-                                       // (1) defined in appropriate loop
-                                       // (2) needs a register
-                                       // (3) spill not already used (in the loop)
-                                       // Condition (3) === "in a register at all loop exits"
-
-                                       entryCandidates.clear()
-
-                                       for whichExit, ss := range loop.exits {
-                                               // Start with live at end.
-                                               for _, li := range s.live[ss.ID] {
-                                                       if s.isLoopSpillCandidate(loop, s.orig[li.ID]) {
-                                                               // s.live contains original IDs, use s.orig above to map back to *Value
-                                                               entryCandidates.setBit(li.ID, uint(whichExit))
-                                                       }
-                                               }
-                                               // Control can also be live.
-                                               if ss.Control != nil && s.orig[ss.Control.ID] != nil && s.isLoopSpillCandidate(loop, s.orig[ss.Control.ID]) {
-                                                       entryCandidates.setBit(s.orig[ss.Control.ID].ID, uint(whichExit))
-                                               }
-                                               // Walk backwards, filling in locally live values, removing those defined.
-                                               for i := len(ss.Values) - 1; i >= 0; i-- {
-                                                       v := ss.Values[i]
-                                                       vorig := s.orig[v.ID]
-                                                       if vorig != nil {
-                                                               entryCandidates.remove(vorig.ID) // Cannot be an issue, only keeps the sets smaller.
-                                                       }
-                                                       for _, a := range v.Args {
-                                                               aorig := s.orig[a.ID]
-                                                               if aorig != nil && s.isLoopSpillCandidate(loop, aorig) {
-                                                                       entryCandidates.setBit(aorig.ID, uint(whichExit))
-                                                               }
-                                                       }
-                                               }
-                                       }
-
-                                       for _, e := range loop.spills {
-                                               whichblocks := entryCandidates.get(e.ID)
-                                               oldSpill := s.values[e.ID].spill
-                                               if whichblocks != 0 && whichblocks != -1 { // -1 = not in map.
-                                                       toSink = append(toSink, spillToSink{spill: oldSpill, dests: whichblocks})
-                                               }
-                                       }
-
-                               } // loop is inner etc
-                               loop.scratch = 0 // Don't leave a mess, just in case.
-                               loop.spills = nil
-                       } // if scratch == nBlocks
-               } // if loop is not nil
+               }
  
                 // Clear any final uses.
                 // All that is left should be the pseudo-uses added for values which
@@ -1606,52 +1486,8 @@ func (s *regAllocState) regalloc(f *Func) {
                 }
         }
  
-       // Erase any spills we never used
-       for i := range s.values {
-               vi := s.values[i]
-               if vi.spillUsed {
-                       if s.f.pass.debug > logSpills && vi.spill.Op != OpArg {
-                               s.f.Config.Warnl(vi.spill.Pos, "spilled value at %v remains", vi.spill)
-                       }
-                       continue
-               }
-               spill := vi.spill
-               if spill == nil {
-                       // Constants, SP, SB, ...
-                       continue
-               }
-               loop := s.loopForBlock(spill.Block)
-               if loop != nil {
-                       nSpillsInner--
-               }
-
-               spill.Args[0].Uses--
-               f.freeValue(spill)
-               nSpills--
-       }
-
-       for _, b := range f.Blocks {
-               i := 0
-               for _, v := range b.Values {
-                       if v.Op == OpInvalid {
-                               continue
-                       }
-                       b.Values[i] = v
-                       i++
-               }
-               b.Values = b.Values[:i]
-               // TODO: zero b.Values[i:], recycle Values
-               // Not important now because this is the last phase that manipulates Values
-       }
-
-       // Must clear these out before any potential recycling, though that's
-       // not currently implemented.
-       for i, ts := range toSink {
-               vsp := ts.spill
-               if vsp.Op == OpInvalid { // This spill was completely eliminated
-                       toSink[i].spill = nil
-               }
-       }
+       // Decide where the spills we generated will go.
+       s.placeSpills()
  
         // Anything that didn't get a register gets a stack location here.
         // (StoreReg, stack-based phis, inputs, ...)
@@ -1660,114 +1496,6 @@ func (s *regAllocState) regalloc(f *Func) {
         // Fix up all merge edges.
         s.shuffle(stacklive)
  
-       // Insert moved spills (that have not been marked invalid above)
-       // at start of appropriate block and remove the originals from their
-       // location within loops.  Notice that this can break SSA form;
-       // if a spill is sunk to multiple exits, there will be no phi for that
-       // spill at a join point downstream of those two exits, though the
-       // two spills will target the same stack slot.  Notice also that this
-       // takes place after stack allocation, so the stack allocator does
-       // not need to process these malformed flow graphs.
-sinking:
-       for _, ts := range toSink {
-               vsp := ts.spill
-               if vsp == nil { // This spill was completely eliminated
-                       nSpillsSunkUnused++
-                       continue sinking
-               }
-               e := ts.spilledValue()
-               if s.values[e.ID].spillUsedShuffle {
-                       nSpillsNotSunkLateUse++
-                       continue sinking
-               }
-
-               // move spills to a better (outside of loop) block.
-               // This would be costly if it occurred very often, but it doesn't.
-               b := vsp.Block
-               loop := s.loopnest.b2l[b.ID]
-               dests := ts.dests
-
-               // Pre-check to be sure that spilled value is still in expected register on all exits where live.
-       check_val_still_in_reg:
-               for i := uint(0); i < 32 && dests != 0; i++ {
-
-                       if dests&(1<<i) == 0 {
-                               continue
-                       }
-                       dests ^= 1 << i
-                       d := loop.exits[i]
-                       if len(d.Preds) > 1 {
-                               panic("Should be impossible given critical edges removed")
-                       }
-                       p := d.Preds[0].b // block in loop exiting to d.
-
-                       endregs := s.endRegs[p.ID]
-                       for _, regrec := range endregs {
-                               if regrec.v == e && regrec.r != noRegister && regrec.c == e { // TODO: regrec.c != e implies different spill possible.
-                                       continue check_val_still_in_reg
-                               }
-                       }
-                       // If here, the register assignment was lost down at least one exit and it can't be sunk
-                       if s.f.pass.debug > moveSpills {
-                               s.f.Config.Warnl(e.Pos, "lost register assignment for spill %v in %v at exit %v to %v",
-                                       vsp, b, p, d)
-                       }
-                       nSpillsChanged++
-                       continue sinking
-               }
-
-               nSpillsSunk++
-               nSpillsInner--
-               // don't update nSpills, since spill is only moved, and if it is duplicated, the spills-on-a-path is not increased.
-
-               dests = ts.dests
-
-               // remove vsp from b.Values
-               i := 0
-               for _, w := range b.Values {
-                       if vsp == w {
-                               continue
-                       }
-                       b.Values[i] = w
-                       i++
-               }
-               b.Values = b.Values[:i]
-
-               first := true
-               for i := uint(0); i < 32 && dests != 0; i++ {
-
-                       if dests&(1<<i) == 0 {
-                               continue
-                       }
-
-                       dests ^= 1 << i
-
-                       d := loop.exits[i]
-                       vspnew := vsp // reuse original for first sunk spill, saves tracking down and renaming uses
-                       if !first {   // any sunk spills after first must make a copy
-                               vspnew = d.NewValue1(e.Pos, OpStoreReg, e.Type, e)
-                               f.setHome(vspnew, f.getHome(vsp.ID)) // copy stack home
-                               if s.f.pass.debug > moveSpills {
-                                       s.f.Config.Warnl(e.Pos, "copied spill %v in %v for %v to %v in %v",
-                                               vsp, b, e, vspnew, d)
-                               }
-                       } else {
-                               first = false
-                               vspnew.Block = d
-                               d.Values = append(d.Values, vspnew)
-                               if s.f.pass.debug > moveSpills {
-                                       s.f.Config.Warnl(e.Pos, "moved spill %v in %v for %v to %v in %v",
-                                               vsp, b, e, vspnew, d)
-                               }
-                       }
-
-                       // shuffle vspnew to the beginning of its block
-                       copy(d.Values[1:], d.Values[0:len(d.Values)-1])
-                       d.Values[0] = vspnew
-
-               }
-       }
-
         // Erase any copies we never used.
         // Also, an unused copy might be the only use of another copy,
         // so continue erasing until we reach a fixed point.
@@ -1800,38 +1528,143 @@ sinking:
                 }
                 b.Values = b.Values[:i]
         }
+}
+
+func (s *regAllocState) placeSpills() {
+       f := s.f
  
-       if f.pass.stats > 0 {
-               f.LogStat("spills_info",
-                       nSpills, "spills", nSpillsInner, "inner_spills_remaining", nSpillsSunk, "inner_spills_sunk", nSpillsSunkUnused, "inner_spills_unused", nSpillsNotSunkLateUse, "inner_spills_shuffled", nSpillsChanged, "inner_spills_changed")
+       // Precompute some useful info.
+       phiRegs := make([]regMask, f.NumBlocks())
+       for _, b := range f.Blocks {
+               var m regMask
+               for _, v := range b.Values {
+                       if v.Op != OpPhi {
+                               break
+                       }
+                       if r, ok := f.getHome(v.ID).(*Register); ok {
+                               m |= regMask(1) << uint(r.num)
+                       }
+               }
+               phiRegs[b.ID] = m
         }
-}
  
-// isLoopSpillCandidate indicates whether the spill for v satisfies preliminary
-// spill-sinking conditions just after the last block of loop has been processed.
-// In particular:
-//   v needs a register.
-//   v's spill is not (YET) used.
-//   v's definition is within loop.
-// The spill may be used in the future, either by an outright use
-// in the code, or by shuffling code inserted after stack allocation.
-// Outright uses cause sinking; shuffling (within the loop) inhibits it.
-func (s *regAllocState) isLoopSpillCandidate(loop *loop, v *Value) bool {
-       return s.values[v.ID].needReg && !s.values[v.ID].spillUsed && s.loopnest.b2l[v.Block.ID] == loop
-}
+       // Start maps block IDs to the list of spills
+       // that go at the start of the block (but after any phis).
+       start := map[ID][]*Value{}
+       // After maps value IDs to the list of spills
+       // that go immediately after that value ID.
+       after := map[ID][]*Value{}
+
+       for i := range s.values {
+               vi := s.values[i]
+               spill := vi.spill
+               if spill == nil {
+                       continue
+               }
+               if spill.Block != nil {
+                       // Some spills are already fully set up,
+                       // like OpArgs and stack-based phis.
+                       continue
+               }
+               v := s.orig[i]
+
+               // Walk down the dominator tree looking for a good place to
+               // put the spill of v.  At the start "best" is the best place
+               // we have found so far.
+               // TODO: find a way to make this O(1) without arbitrary cutoffs.
+               best := v.Block
+               bestArg := v
+               var bestDepth int16
+               if l := s.loopnest.b2l[best.ID]; l != nil {
+                       bestDepth = l.depth
+               }
+               b := best
+               const maxSpillSearch = 100
+               for i := 0; i < maxSpillSearch; i++ {
+                       // Find the child of b in the dominator tree which
+                       // dominates all restores.
+                       p := b
+                       b = nil
+                       for c := s.sdom.Child(p); c != nil && i < maxSpillSearch; c, i = s.sdom.Sibling(c), i+1 {
+                               if s.sdom[c.ID].entry <= vi.restoreMin && s.sdom[c.ID].exit >= vi.restoreMax {
+                                       // c also dominates all restores.  Walk down into c.
+                                       b = c
+                                       break
+                               }
+                       }
+                       if b == nil {
+                               // Ran out of blocks which dominate all restores.
+                               break
+                       }
+
+                       var depth int16
+                       if l := s.loopnest.b2l[b.ID]; l != nil {
+                               depth = l.depth
+                       }
+                       if depth > bestDepth {
+                               // Don't push the spill into a deeper loop.
+                               continue
+                       }
  
-// lateSpillUse notes a late (after stack allocation) use of the spill of value with ID vid.
-// This will inhibit spill sinking.
-func (s *regAllocState) lateSpillUse(vid ID) {
-       // TODO investigate why this is necessary.
-       // It appears that an outside-the-loop use of
-       // an otherwise sinkable spill makes the spill
-       // a candidate for shuffling, when it would not
-       // otherwise have been the case (spillUsed was not
-       // true when isLoopSpillCandidate was called, yet
-       // it was shuffled).  Such shuffling cuts the amount
-       // of spill sinking by more than half (in make.bash)
-       s.values[vid].spillUsedShuffle = true
+                       // If v is in a register at the start of b, we can
+                       // place the spill here (after the phis).
+                       if len(b.Preds) == 1 {
+                               for _, e := range s.endRegs[b.Preds[0].b.ID] {
+                                       if e.v == v {
+                                               // Found a better spot for the spill.
+                                               best = b
+                                               bestArg = e.c
+                                               bestDepth = depth
+                                               break
+                                       }
+                               }
+                       } else {
+                               for _, e := range s.startRegs[b.ID] {
+                                       if e.v == v {
+                                               // Found a better spot for the spill.
+                                               best = b
+                                               bestArg = e.c
+                                               bestDepth = depth
+                                               break
+                                       }
+                               }
+                       }
+               }
+
+               // Put the spill in the best block we found.
+               spill.Block = best
+               spill.AddArg(bestArg)
+               if best == v.Block && v.Op != OpPhi {
+                       // Place immediately after v.
+                       after[v.ID] = append(after[v.ID], spill)
+               } else {
+                       // Place at the start of best block.
+                       start[best.ID] = append(start[best.ID], spill)
+               }
+       }
+
+       // Insert spill instructions into the block schedules.
+       var oldSched []*Value
+       for _, b := range f.Blocks {
+               nphi := 0
+               for _, v := range b.Values {
+                       if v.Op != OpPhi {
+                               break
+                       }
+                       nphi++
+               }
+               oldSched = append(oldSched[:0], b.Values[nphi:]...)
+               b.Values = b.Values[:nphi]
+               for _, v := range start[b.ID] {
+                       b.Values = append(b.Values, v)
+               }
+               for _, v := range oldSched {
+                       b.Values = append(b.Values, v)
+                       for _, w := range after[v.ID] {
+                               b.Values = append(b.Values, w)
+                       }
+               }
+       }
  }
  
  // shuffle fixes up all the merge edges (those going into blocks of indegree > 1).
@@ -1919,13 +1752,24 @@ func (e *edgeState) setup(idx int, srcReg []endReg, dstReg []startReg, stacklive
         for _, spillID := range stacklive {
                 v := e.s.orig[spillID]
                 spill := e.s.values[v.ID].spill
+               if !e.s.sdom.isAncestorEq(spill.Block, e.p) {
+                       // Spills were placed that only dominate the uses found
+                       // during the first regalloc pass. The edge fixup code
+                       // can't use a spill location if the spill doesn't dominate
+                       // the edge.
+                       // We are guaranteed that if the spill doesn't dominate this edge,
+                       // then the value is available in a register (because we called
+                       // makeSpill for every value not in a register at the start
+                       // of an edge).
+                       continue
+               }
                 e.set(e.s.f.getHome(spillID), v.ID, spill, false, src.NoXPos) // don't care the position of the source
         }
  
         // Figure out all the destinations we need.
         dsts := e.destinations[:0]
         for _, x := range dstReg {
-               dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.vid, nil, x.pos})
+               dsts = append(dsts, dstRecord{&e.s.registers[x.r], x.v.ID, nil, x.pos})
         }
         // Phis need their args to end up in a specific location.
         for _, v := range e.b.Values {
@@ -2010,7 +1854,6 @@ func (e *edgeState) process() {
                 if _, isReg := loc.(*Register); isReg {
                         c = e.p.NewValue1(d.pos, OpCopy, c.Type, c)
                 } else {
-                       e.s.lateSpillUse(vid)
                         c = e.p.NewValue1(d.pos, OpLoadReg, c.Type, c)
                 }
                 e.set(r, vid, c, false, d.pos)
@@ -2028,9 +1871,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
                         (*splice).Uses--
                         *splice = occupant.c
                         occupant.c.Uses++
-                       if occupant.c.Op == OpStoreReg {
-                               e.s.lateSpillUse(vid)
-                       }
                 }
                 // Note: if splice==nil then c will appear dead. This is
                 // non-SSA formed code, so be careful after this pass not to run
@@ -2106,7 +1946,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
                         }
                 } else {
                         if dstReg {
-                               e.s.lateSpillUse(vid)
                                 x = e.p.NewValue1(pos, OpLoadReg, c.Type, c)
                         } else {
                                 // mem->mem. Use temp register.
@@ -2124,7 +1963,6 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
                                 e.erase(loc)
  
                                 r := e.findRegFor(c.Type)
-                               e.s.lateSpillUse(vid)
                                 t := e.p.NewValue1(pos, OpLoadReg, c.Type, c)
                                 e.set(r, vid, t, false, pos)
                                 x = e.p.NewValue1(pos, OpStoreReg, loc.(LocalSlot).Type, t)
@@ -2630,3 +2468,16 @@ func (d *desiredState) merge(x *desiredState) {
                 d.addList(e.ID, e.regs)
         }
  }
+
+func min32(x, y int32) int32 {
+       if x < y {
+               return x
+       }
+       return y
+}
+func max32(x, y int32) int32 {
+       if x > y {
+               return x
+       }
+       return y
+}
author	David Chase <drchase@google.com>
	Tue, 7 Mar 2017 19:45:46 +0000 (14:45 -0500)
committer	Keith Randall <khr@golang.org>
	Wed, 15 Mar 2017 02:09:25 +0000 (02:09 +0000)
src/cmd/compile/internal/ssa/dom.go		patch \| blob \| history
src/cmd/compile/internal/ssa/func.go		patch \| blob \| history
src/cmd/compile/internal/ssa/regalloc.go		patch \| blob \| history