]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/internal/ssa: Add register allocation
authorKeith Randall <khr@golang.org>
Tue, 5 May 2015 23:19:12 +0000 (16:19 -0700)
committerKeith Randall <khr@golang.org>
Thu, 7 May 2015 22:30:03 +0000 (22:30 +0000)
Add a simple register allocator.  It does only intra-basicblock
allocation.  It uses a greedy one-pass allocation treating the
register file as a cache.

Change-Id: Ib6b52f48270e08dfda98f2dd842b05afc3ab01ce
Reviewed-on: https://go-review.googlesource.com/9761
Reviewed-by: Alan Donovan <adonovan@google.com>
src/cmd/internal/ssa/block.go
src/cmd/internal/ssa/cgen.go
src/cmd/internal/ssa/compile.go
src/cmd/internal/ssa/critical.go
src/cmd/internal/ssa/location.go
src/cmd/internal/ssa/op.go
src/cmd/internal/ssa/op_string.go
src/cmd/internal/ssa/regalloc.go [new file with mode: 0644]
src/cmd/internal/ssa/sparseset.go
src/cmd/internal/ssa/stackalloc.go [new file with mode: 0644]

index 81b5594f383698f15453a7cd503e01710a01aa1b..dcf3676bc20b833ac04685dbe134d7c1a229252b 100644 (file)
@@ -19,7 +19,7 @@ type Block struct {
        Kind BlockKind
 
        // Subsequent blocks, if any.  The number and order depend on the block kind.
-       // All blocks must be distinct (to make phi values in successors unambiguous).
+       // All successors must be distinct (to make phi values in successors unambiguous).
        Succs []*Block
 
        // Inverse of successors.
@@ -33,8 +33,9 @@ type Block struct {
        // has a memory control value.
        Control *Value
 
-       // The unordered set of Values contained in this block.
+       // The unordered set of Values that define the operation of this block.
        // The list must include the control value, if any. (TODO: need this last condition?)
+       // After the scheduling pass, this list is ordered.
        Values []*Value
 
        // The containing function
index c13e715653f9b910c953637a74293049f51ebfc1..51c72aacd9910e77741d3a6b6af71f67a2d17aad 100644 (file)
@@ -18,9 +18,6 @@ func cgen(f *Func) {
 
        // TODO: prolog, allocate stack frame
 
-       // hack for now, until regalloc is done
-       f.RegAlloc = make([]Location, f.NumValues())
-
        for idx, b := range f.Blocks {
                fmt.Printf("%d:\n", b.ID)
                for _, v := range b.Values {
index 08477d470c7e5b90bcbe926ce69674b24bacb8ee..c1f79567915d2935de16752d2bda1044233c309f 100644 (file)
@@ -63,8 +63,8 @@ var passes = [...]pass{
        {"critical", critical}, // remove critical edges
        {"layout", layout},     // schedule blocks
        {"schedule", schedule}, // schedule values
-       // regalloc
-       // stack slot alloc (+size stack frame)
+       {"regalloc", regalloc},
+       {"stackalloc", stackalloc},
        {"cgen", cgen},
 }
 
@@ -72,19 +72,26 @@ var passes = [...]pass{
 // This code is intended to document the ordering requirements
 // between different phases.  It does not override the passes
 // list above.
-var passOrder = map[string]string{
+type constraint struct {
+       a, b string // a must come before b
+}
+
+var passOrder = [...]constraint{
        // don't layout blocks until critical edges have been removed
-       "critical": "layout",
+       {"critical", "layout"},
        // regalloc requires the removal of all critical edges
-       //"critical": "regalloc",
+       {"critical", "regalloc"},
        // regalloc requires all the values in a block to be scheduled
-       //"schedule": "regalloc",
-       // code generation requires register allocation
-       //"regalloc": "cgen",
+       {"schedule", "regalloc"},
+       // stack allocation requires register allocation
+       {"regalloc", "stackalloc"},
+       // code generation requires stack allocation
+       {"stackalloc", "cgen"},
 }
 
 func init() {
-       for a, b := range passOrder {
+       for _, c := range passOrder {
+               a, b := c.a, c.b
                i := -1
                j := -1
                for k, p := range passes {
index 5bbad8f2f56bca8fd05f77e5645e5f068496f72c..503681ffd3b14b53367d18cb0e88f38b6fbce4da 100644 (file)
@@ -29,7 +29,7 @@ func critical(f *Func) {
                // split input edges coming from multi-output blocks.
                for i, c := range b.Preds {
                        if c.Kind == BlockPlain {
-                               continue
+                               continue // only single output block
                        }
 
                        // allocate a new block to place on the edge
index 94c1b426a2fe13aced6da506f7693040dec8c357..5fc2c5c93438acd519fe4a9f4600fb1cfdaa2533 100644 (file)
@@ -28,7 +28,7 @@ type LocalSlot struct {
 }
 
 func (s *LocalSlot) Name() string {
-       return fmt.Sprintf("loc%d", s.idx)
+       return fmt.Sprintf("-%d(FP)", s.idx)
 }
 
 // An ArgSlot is a location in the parents' stack frame where it passed us an argument.
index 600dc9faa63d75fb708f7aa3d288497575b4fffd..2d60b929393b94112075fa01d0f18d3cc313fbd6 100644 (file)
@@ -127,6 +127,9 @@ const (
        OpMOVQstoreFP
        OpMOVQstoreSP
 
+       // materialize a constant into a register
+       OpMOVQconst
+
        OpMax // sentinel
 )
 
@@ -151,14 +154,13 @@ type regMask uint64
 
 var regs386 = [...]string{
        "AX",
-       "BX",
        "CX",
        "DX",
-       "SI",
-       "DI",
+       "BX",
        "SP",
        "BP",
-       "X0",
+       "SI",
+       "DI",
 
        // pseudo registers
        "FLAGS",
@@ -166,10 +168,10 @@ var regs386 = [...]string{
 }
 
 // TODO: match up these with regs386 above
-var gp regMask = 0xff
-var cx regMask = 0x4
-var flags regMask = 1 << 9
-var overwrite0 regMask = 1 << 10
+var gp regMask = 0xef
+var cx regMask = 0x2
+var flags regMask = 1 << 8
+var overwrite0 regMask = 1 << 9
 
 const (
        // possible properties of opcodes
@@ -177,20 +179,23 @@ const (
 
        // architecture constants
        Arch386
-       ArchAmd64
-       ArchArm
+       ArchAMD64
+       ArchARM
 )
 
 // general purpose registers, 2 input, 1 output
 var gp21 = [2][]regMask{{gp, gp}, {gp}}
-var gp21_overwrite = [2][]regMask{{gp, gp}, {overwrite0}}
+var gp21_overwrite = [2][]regMask{{gp, gp}, {gp}}
 
 // general purpose registers, 1 input, 1 output
 var gp11 = [2][]regMask{{gp}, {gp}}
-var gp11_overwrite = [2][]regMask{{gp}, {overwrite0}}
+var gp11_overwrite = [2][]regMask{{gp}, {gp}}
+
+// general purpose registers, 0 input, 1 output
+var gp01 = [2][]regMask{{}, {gp}}
 
 // shift operations
-var shift = [2][]regMask{{gp, cx}, {overwrite0}}
+var shift = [2][]regMask{{gp, cx}, {gp}}
 
 var gp2_flags = [2][]regMask{{gp, gp}, {flags}}
 var gp1_flags = [2][]regMask{{gp}, {flags}}
@@ -199,6 +204,9 @@ var gploadX = [2][]regMask{{gp, gp, 0}, {gp}} // indexed loads
 var gpstore = [2][]regMask{{gp, gp, 0}, {0}}
 var gpstoreX = [2][]regMask{{gp, gp, gp, 0}, {0}} // indexed stores
 
+var gpload_stack = [2][]regMask{{0}, {gp}}
+var gpstore_stack = [2][]regMask{{gp, 0}, {0}}
+
 // Opcodes that represent the input Go program
 var genericTable = [...]OpInfo{
        // the unknown op is used only during building and should not appear in a
@@ -284,6 +292,8 @@ var amd64Table = [...]OpInfo{
        OpMOVQload8:  {asm: "MOVQ\t%A(%I0)(%I1*8),%O0", reg: gploadX},
        OpMOVQstore8: {asm: "MOVQ\t%I2,%A(%I0)(%I1*8)", reg: gpstoreX},
 
+       OpMOVQconst: {asm: "MOVQ\t$%A,%O0", reg: gp01},
+
        OpStaticCall: {asm: "CALL\t%A(SB)"},
 
        OpCopy: {asm: "MOVQ\t%I0,%O0", reg: gp11},
@@ -292,17 +302,17 @@ var amd64Table = [...]OpInfo{
        OpSETL: {},
 
        // ops for load/store to stack
-       OpMOVQloadFP:  {asm: "MOVQ\t%A(FP),%O0"},
-       OpMOVQloadSP:  {asm: "MOVQ\t%A(SP),%O0"},
-       OpMOVQstoreFP: {asm: "MOVQ\t%I0,%A(FP)"},
-       OpMOVQstoreSP: {asm: "MOVQ\t%I0,%A(SP)"},
+       OpMOVQloadFP:  {asm: "MOVQ\t%A(FP),%O0", reg: gpload_stack},  // mem -> value
+       OpMOVQloadSP:  {asm: "MOVQ\t%A(SP),%O0", reg: gpload_stack},  // mem -> value
+       OpMOVQstoreFP: {asm: "MOVQ\t%I0,%A(FP)", reg: gpstore_stack}, // mem, value -> mem
+       OpMOVQstoreSP: {asm: "MOVQ\t%I0,%A(SP)", reg: gpstore_stack}, // mem, value -> mem
 
        // ops for spilling of registers
        // unlike regular loads & stores, these take no memory argument.
        // They are just like OpCopy but we use them during register allocation.
        // TODO: different widths, float
-       OpLoadReg8:  {asm: "MOVQ\t%I0,%O0", reg: gp11},
-       OpStoreReg8: {asm: "MOVQ\t%I0,%O0", reg: gp11},
+       OpLoadReg8:  {asm: "MOVQ\t%I0,%O0"},
+       OpStoreReg8: {asm: "MOVQ\t%I0,%O0"},
 }
 
 // A Table is a list of opcodes with a common set of flags.
@@ -313,7 +323,7 @@ type Table struct {
 
 var tables = []Table{
        {genericTable[:], 0},
-       {amd64Table[:], ArchAmd64}, // TODO: pick this dynamically
+       {amd64Table[:], ArchAMD64}, // TODO: pick this dynamically
 }
 
 // table of opcodes, indexed by opcode ID
index 5c42d22439eeaddd70ce8a6e490971f417a600db..c095fba52b9aed91abdcf4ceccafbd2b37e2dbf4 100644 (file)
@@ -4,9 +4,9 @@ package ssa
 
 import "fmt"
 
-const _Op_name = "OpUnknownOpNopOpFwdRefOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpSliceIndexOpSliceIndexAddrOpLoadOpStoreOpCheckNilOpCheckBoundOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpCMPQOpCMPCQOpADDLOpTESTQOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpMOVQloadOpMOVQstoreOpMOVQload8OpMOVQstore8OpMOVQloadFPOpMOVQloadSPOpMOVQstoreFPOpMOVQstoreSPOpMax"
+const _Op_name = "OpUnknownOpNopOpFwdRefOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpSliceIndexOpSliceIndexAddrOpLoadOpStoreOpCheckNilOpCheckBoundOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpCMPQOpCMPCQOpADDLOpTESTQOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpMOVQloadOpMOVQstoreOpMOVQload8OpMOVQstore8OpMOVQloadFPOpMOVQloadSPOpMOVQstoreFPOpMOVQstoreSPOpMOVQconstOpMax"
 
-var _Op_index = [...]uint16{0, 9, 14, 22, 27, 32, 37, 43, 50, 55, 63, 69, 75, 80, 91, 101, 111, 121, 133, 144, 155, 167, 183, 189, 196, 206, 218, 224, 236, 245, 254, 262, 270, 281, 291, 297, 303, 310, 317, 323, 330, 336, 343, 349, 355, 362, 368, 375, 382, 389, 395, 402, 408, 421, 427, 434, 441, 448, 458, 469, 480, 492, 504, 516, 529, 542, 547}
+var _Op_index = [...]uint16{0, 9, 14, 22, 27, 32, 37, 43, 50, 55, 63, 69, 75, 80, 91, 101, 111, 121, 133, 144, 155, 167, 183, 189, 196, 206, 218, 224, 236, 245, 254, 262, 270, 281, 291, 297, 303, 310, 317, 323, 330, 336, 343, 349, 355, 362, 368, 375, 382, 389, 395, 402, 408, 421, 427, 434, 441, 448, 458, 469, 480, 492, 504, 516, 529, 542, 553, 558}
 
 func (i Op) String() string {
        if i < 0 || i+1 >= Op(len(_Op_index)) {
diff --git a/src/cmd/internal/ssa/regalloc.go b/src/cmd/internal/ssa/regalloc.go
new file mode 100644 (file)
index 0000000..724a055
--- /dev/null
@@ -0,0 +1,421 @@
+package ssa
+
+import (
+       "fmt"
+       "log"
+       "sort"
+)
+
+func setloc(home []Location, v *Value, loc Location) []Location {
+       for v.ID >= ID(len(home)) {
+               home = append(home, nil)
+       }
+       home[v.ID] = loc
+       return home
+}
+
+type register uint
+
+// TODO: make arch-dependent
+var numRegs register = 32
+
+var registers = [...]Register{
+       Register{"AX"},
+       Register{"CX"},
+       Register{"DX"},
+       Register{"BX"},
+       Register{"SP"},
+       Register{"BP"},
+       Register{"SI"},
+       Register{"DI"},
+
+       // TODO R8, X0, ...
+       // TODO: make arch-dependent
+       Register{"FLAGS"},
+       Register{"OVERWRITE"},
+}
+
+// countRegs returns the number of set bits in the register mask.
+func countRegs(r regMask) int {
+       n := 0
+       for r != 0 {
+               n += int(r & 1)
+               r >>= 1
+       }
+       return n
+}
+
+// pickReg picks an arbitrary register from the register mask.
+func pickReg(r regMask) register {
+       // pick the lowest one
+       if r == 0 {
+               panic("can't pick a register from an empty set")
+       }
+       for i := register(0); ; i++ {
+               if r&1 != 0 {
+                       return i
+               }
+               r >>= 1
+       }
+}
+
+// regalloc performs register allocation on f.  It sets f.RegAlloc
+// to the resulting allocation.
+func regalloc(f *Func) {
+       // For now, a very simple allocator.  Everything has a home
+       // location on the stack (TBD as a subsequent stackalloc pass).
+       // Values live in the home locations at basic block boundaries.
+       // We use a simple greedy allocator within a basic block.
+       home := make([]Location, f.NumValues())
+
+       addPhiCopies(f) // add copies of phi inputs in preceeding blocks
+
+       // Compute live values at the end of each block.
+       live := live(f)
+       lastUse := make([]int, f.NumValues())
+
+       var oldSched []*Value
+
+       // Register allocate each block separately.  All live values will live
+       // in home locations (stack slots) between blocks.
+       for _, b := range f.Blocks {
+
+               // Compute the index of the last use of each Value in the Block.
+               // Scheduling has already happened, so Values are totally ordered.
+               // lastUse[x] = max(i) where b.Value[i] uses Value x.
+               for i, v := range b.Values {
+                       lastUse[v.ID] = -1
+                       for _, w := range v.Args {
+                               // could condition this store on w.Block == b, but no need
+                               lastUse[w.ID] = i
+                       }
+               }
+               // Values which are live at block exit have a lastUse of len(b.Values).
+               if b.Control != nil {
+                       lastUse[b.Control.ID] = len(b.Values)
+               }
+               // Values live after block exit have a lastUse of len(b.Values)+1.
+               for _, vid := range live[b.ID] {
+                       lastUse[vid] = len(b.Values) + 1
+               }
+
+               // For each register, store which value it contains
+               type regInfo struct {
+                       v     *Value // stack-homed original value (or nil if empty)
+                       c     *Value // the register copy of v
+                       dirty bool   // if the stack-homed copy is out of date
+               }
+               regs := make([]regInfo, numRegs)
+
+               var used regMask  // has a 1 for each non-nil entry in regs
+               var dirty regMask // has a 1 for each dirty entry in regs
+
+               oldSched = append(oldSched[:0], b.Values...)
+               b.Values = b.Values[:0]
+
+               for idx, v := range oldSched {
+                       // For each instruction, do:
+                       //   set up inputs to v in registers
+                       //   pick output register
+                       //   run insn
+                       //   mark output register as dirty
+                       // Note that v represents the Value at "home" (on the stack), and c
+                       // is its register equivalent.  There are two ways to establish c:
+                       //   - use of v.  c will be a load from v's home.
+                       //   - definition of v.  c will be identical to v but will live in
+                       //     a register.  v will be modified into a spill of c.
+                       regspec := opcodeTable[v.Op].reg
+                       if v.Op == OpConvNop {
+                               regspec = opcodeTable[v.Args[0].Op].reg
+                       }
+                       inputs := regspec[0]
+                       outputs := regspec[1]
+                       if len(inputs) == 0 && len(outputs) == 0 {
+                               // No register allocation required (or none specified yet)
+                               b.Values = append(b.Values, v)
+                               continue
+                       }
+
+                       // Compute a good input ordering.  Start with the most constrained input.
+                       order := make([]intPair, len(inputs))
+                       for i, input := range inputs {
+                               order[i] = intPair{countRegs(input), i}
+                       }
+                       sort.Sort(byKey(order))
+
+                       // nospill contains registers that we can't spill because
+                       // we already set them up for use by the current instruction.
+                       var nospill regMask
+
+                       // Move inputs into registers
+                       for _, o := range order {
+                               w := v.Args[o.val]
+                               mask := inputs[o.val]
+                               if mask == 0 {
+                                       // Input doesn't need a register
+                                       continue
+                               }
+                               // TODO: 2-address overwrite instructions
+
+                               // Find registers that w is already in
+                               var wreg regMask
+                               for r := register(0); r < numRegs; r++ {
+                                       if regs[r].v == w {
+                                               wreg |= regMask(1) << r
+                                       }
+                               }
+
+                               var r register
+                               if mask&wreg != 0 {
+                                       // w is already in an allowed register.  We're done.
+                                       r = pickReg(mask & wreg)
+                               } else {
+                                       // Pick a register for w
+                                       // Priorities (in order)
+                                       //  - an unused register
+                                       //  - a clean register
+                                       //  - a dirty register
+                                       // TODO: for used registers, pick the one whose next use is the
+                                       // farthest in the future.
+                                       mask &^= nospill
+                                       if mask & ^dirty != 0 {
+                                               mask &^= dirty
+                                       }
+                                       if mask & ^used != 0 {
+                                               mask &^= used
+                                       }
+                                       r = pickReg(mask)
+
+                                       // Kick out whomever is using this register.
+                                       if regs[r].v != nil {
+                                               x := regs[r].v
+                                               c := regs[r].c
+                                               if regs[r].dirty && lastUse[x.ID] > idx {
+                                                       // Write x back to home.  Its value is currently held in c.
+                                                       x.Op = OpStoreReg8
+                                                       x.Aux = nil
+                                                       x.resetArgs()
+                                                       x.AddArg(c)
+                                                       b.Values = append(b.Values, x)
+                                                       regs[r].dirty = false
+                                                       dirty &^= regMask(1) << r
+                                               }
+                                               regs[r].v = nil
+                                               regs[r].c = nil
+                                               used &^= regMask(1) << r
+                                       }
+
+                                       // Load w into this register
+                                       var c *Value
+                                       if w.Op == OpConst {
+                                               // Materialize w
+                                               // TODO: arch-specific MOV op
+                                               c = b.NewValue(OpMOVQconst, w.Type, w.Aux)
+                                       } else if wreg != 0 {
+                                               // Copy from another register.
+                                               // Typically just an optimization, but this is
+                                               // required if w is dirty.
+                                               s := pickReg(wreg)
+                                               // inv: s != r
+                                               c = b.NewValue(OpCopy, w.Type, nil)
+                                               c.AddArg(regs[s].c)
+                                       } else {
+                                               // Load from home location
+                                               c = b.NewValue(OpLoadReg8, w.Type, nil)
+                                               c.AddArg(w)
+                                       }
+                                       home = setloc(home, c, &registers[r])
+                                       // Remember what we did
+                                       regs[r].v = w
+                                       regs[r].c = c
+                                       regs[r].dirty = false
+                                       used |= regMask(1) << r
+                               }
+
+                               // Replace w with its in-register copy.
+                               v.SetArg(o.val, regs[r].c)
+
+                               // Remember not to undo this register assignment until after
+                               // the instruction is issued.
+                               nospill |= regMask(1) << r
+                       }
+
+                       // pick a register for v itself.
+                       if len(outputs) > 1 {
+                               panic("can't do multi-output yet")
+                       }
+                       if len(outputs) == 0 || outputs[0] == 0 {
+                               // output doesn't need a register
+                               b.Values = append(b.Values, v)
+                       } else {
+                               mask := outputs[0]
+                               if mask & ^dirty != 0 {
+                                       mask &^= dirty
+                               }
+                               if mask & ^used != 0 {
+                                       mask &^= used
+                               }
+                               r := pickReg(mask)
+
+                               // Kick out whomever is using this register.
+                               if regs[r].v != nil {
+                                       x := regs[r].v
+                                       c := regs[r].c
+                                       if regs[r].dirty && lastUse[x.ID] > idx {
+                                               // Write x back to home.  Its value is currently held in c.
+                                               x.Op = OpStoreReg8
+                                               x.Aux = nil
+                                               x.resetArgs()
+                                               x.AddArg(c)
+                                               b.Values = append(b.Values, x)
+                                               regs[r].dirty = false
+                                               dirty &^= regMask(1) << r
+                                       }
+                                       regs[r].v = nil
+                                       regs[r].c = nil
+                                       used &^= regMask(1) << r
+                               }
+
+                               // Reissue v with new op, with r as its home.
+                               c := b.NewValue(v.Op, v.Type, v.Aux)
+                               c.AddArgs(v.Args...)
+                               home = setloc(home, c, &registers[r])
+
+                               // Remember what we did
+                               regs[r].v = v
+                               regs[r].c = c
+                               regs[r].dirty = true
+                               used |= regMask(1) << r
+                               dirty |= regMask(1) << r
+                       }
+               }
+
+               // If the block ends in a call, we must put the call after the spill code.
+               var call *Value
+               if b.Kind == BlockCall {
+                       call = b.Control
+                       if call != b.Values[len(b.Values)-1] {
+                               log.Fatalf("call not at end of block %b %v", b, call)
+                       }
+                       b.Values = b.Values[:len(b.Values)-1]
+                       // TODO: do this for all control types?
+               }
+
+               // at the end of the block, spill any remaining dirty, live values
+               for r := register(0); r < numRegs; r++ {
+                       if !regs[r].dirty {
+                               continue
+                       }
+                       v := regs[r].v
+                       c := regs[r].c
+                       if lastUse[v.ID] <= len(oldSched) {
+                               continue // not live after block
+                       }
+
+                       // change v to be a copy of c
+                       v.Op = OpStoreReg8
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.AddArg(c)
+                       b.Values = append(b.Values, v)
+               }
+
+               // add call back after spills
+               if b.Kind == BlockCall {
+                       b.Values = append(b.Values, call)
+               }
+       }
+       f.RegAlloc = home
+}
+
+// addPhiCopies adds copies of phi inputs in the blocks
+// immediately preceding the phi's block.
+func addPhiCopies(f *Func) {
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.Op != OpPhi {
+                               break // all phis should appear first
+                       }
+                       if v.Type.IsMemory() { // TODO: only "regallocable" types
+                               continue
+                       }
+                       for i, w := range v.Args {
+                               c := b.Preds[i]
+                               cpy := c.NewValue1(OpCopy, v.Type, nil, w)
+                               v.Args[i] = cpy
+                       }
+               }
+       }
+}
+
+// live returns a map from block ID to a list of value IDs live at the end of that block
+// TODO: this could be quadratic if lots of variables are live across lots of
+// basic blocks.  Figure out a way to make this function (or, more precisely, the user
+// of this function) require only linear size & time.
+func live(f *Func) [][]ID {
+       live := make([][]ID, f.NumBlocks())
+       var phis []*Value
+
+       s := newSparseSet(f.NumValues())
+       t := newSparseSet(f.NumValues())
+       for {
+               for _, b := range f.Blocks {
+                       fmt.Printf("live %s %v\n", b, live[b.ID])
+               }
+               changed := false
+
+               for _, b := range f.Blocks {
+                       // Start with known live values at the end of the block
+                       s.clear()
+                       s.addAll(live[b.ID])
+
+                       // Propagate backwards to the start of the block
+                       // Assumes Values have been scheduled.
+                       phis := phis[:0]
+                       for i := len(b.Values) - 1; i >= 0; i-- {
+                               v := b.Values[i]
+                               s.remove(v.ID)
+                               if v.Op == OpPhi {
+                                       // save phi ops for later
+                                       phis = append(phis, v)
+                                       continue
+                               }
+                               s.addAllValues(v.Args)
+                       }
+
+                       // for each predecessor of b, expand its list of live-at-end values
+                       // inv: s contains the values live at the start of b (excluding phi inputs)
+                       for i, p := range b.Preds {
+                               t.clear()
+                               t.addAll(live[p.ID])
+                               t.addAll(s.contents())
+                               for _, v := range phis {
+                                       t.add(v.Args[i].ID)
+                               }
+                               if t.size() == len(live[p.ID]) {
+                                       continue
+                               }
+                               // grow p's live set
+                               c := make([]ID, t.size())
+                               copy(c, t.contents())
+                               live[p.ID] = c
+                               changed = true
+                       }
+               }
+
+               if !changed {
+                       break
+               }
+       }
+       return live
+}
+
+// for sorting a pair of integers by key
+type intPair struct {
+       key, val int
+}
+type byKey []intPair
+
+func (a byKey) Len() int           { return len(a) }
+func (a byKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
index e1f9a9a81d5a87cbdfa6051dbbaa92a1b9e4eb96..b79aee84977894c242d5a1ca71340853578006c6 100644 (file)
@@ -28,9 +28,24 @@ func (s *sparseSet) contains(x ID) bool {
 }
 
 func (s *sparseSet) add(x ID) {
-       i := len(s.dense)
+       i := s.sparse[x]
+       if i < len(s.dense) && s.dense[i] == x {
+               return
+       }
        s.dense = append(s.dense, x)
-       s.sparse[x] = i
+       s.sparse[x] = len(s.dense) - 1
+}
+
+func (s *sparseSet) addAll(a []ID) {
+       for _, x := range a {
+               s.add(x)
+       }
+}
+
+func (s *sparseSet) addAllValues(a []*Value) {
+       for _, v := range a {
+               s.add(v.ID)
+       }
 }
 
 func (s *sparseSet) remove(x ID) {
diff --git a/src/cmd/internal/ssa/stackalloc.go b/src/cmd/internal/ssa/stackalloc.go
new file mode 100644 (file)
index 0000000..aa6d829
--- /dev/null
@@ -0,0 +1,51 @@
+package ssa
+
+// stackalloc allocates storage in the stack frame for
+// all Values that did not get a register.
+func stackalloc(f *Func) {
+       home := f.RegAlloc
+
+       var n int64 = 8 // 8 = space for return address.  TODO: arch-dependent
+
+       // Assign stack locations to phis first, because we
+       // must also assign the same locations to the phi copies
+       // introduced during regalloc.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.Op != OpPhi {
+                               continue
+                       }
+                       n += v.Type.Size()
+                       // a := v.Type.Align()
+                       // n = (n + a - 1) / a * a  TODO
+                       loc := &LocalSlot{n}
+                       home = setloc(home, v, loc)
+                       for _, w := range v.Args {
+                               home = setloc(home, w, loc)
+                       }
+               }
+       }
+
+       // Now do all other unassigned values.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.ID < ID(len(home)) && home[v.ID] != nil {
+                               continue
+                       }
+                       if v.Type.IsMemory() { // TODO: only "regallocable" types
+                               continue
+                       }
+                       // a := v.Type.Align()
+                       // n = (n + a - 1) / a * a  TODO
+                       n += v.Type.Size()
+                       loc := &LocalSlot{n}
+                       home = setloc(home, v, loc)
+               }
+       }
+       f.RegAlloc = home
+
+       // TODO: share stack slots among noninterfering (& gc type compatible) values
+       // TODO: align final n
+       // TODO: compute total frame size: n + max paramout space
+       // TODO: save total size somewhere
+}