]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/internal/ssa: Handle more instructions + some cleanup
authorKeith Randall <khr@golang.org>
Mon, 18 May 2015 23:44:20 +0000 (16:44 -0700)
committerKeith Randall <khr@golang.org>
Wed, 27 May 2015 15:53:54 +0000 (15:53 +0000)
Add & as an input op.  Add several output ops (loads & stores, TESTB,
LEAQglobal, branches, memcopy)

Some other small things:
- Add exprAddr to builder to generate addresses of expressions.  Use it in
  various places that had ad-hoc code.
- Separate out nil & bounds check generation to separate functions.
- Add explicit FP and SP ops so we dont need specialized *FP and *SP opcodes.
- Fix fallthrough at end of functions with no return values.
- rematerialization of more opcodes.

Change-Id: I781decfcef9770fb15f0cd6b061547f7824a2d5e
Reviewed-on: https://go-review.googlesource.com/10213
Reviewed-by: Alan Donovan <adonovan@google.com>
16 files changed:
src/cmd/internal/gc/ssa.go
src/cmd/internal/obj/x86/6.out.go
src/cmd/internal/ssa/check.go
src/cmd/internal/ssa/config.go
src/cmd/internal/ssa/generic.go
src/cmd/internal/ssa/lower.go
src/cmd/internal/ssa/lowerAmd64.go
src/cmd/internal/ssa/op.go
src/cmd/internal/ssa/op_string.go
src/cmd/internal/ssa/opamd64.go
src/cmd/internal/ssa/regalloc.go
src/cmd/internal/ssa/rewrite.go
src/cmd/internal/ssa/rulegen/generic.rules
src/cmd/internal/ssa/rulegen/lower_amd64.rules
src/cmd/internal/ssa/rulegen/rulegen.go
src/cmd/internal/ssa/stackalloc.go

index ec6ad8abcb4d6d2aa0b1c0cccfe1c2bfb974bf5a..8e81163ad4dc80ad2f52bb8653e799946f25d4bd 100644 (file)
@@ -15,7 +15,7 @@ import (
 func buildssa(fn *Node) *ssa.Func {
        dumplist("buildssa", Curfn.Nbody)
 
-       var s ssaState
+       var s state
 
        // TODO(khr): build config just once at the start of the compiler binary
        s.config = ssa.NewConfig(Thearch.Thestring)
@@ -33,8 +33,10 @@ func buildssa(fn *Node) *ssa.Func {
        // Allocate exit block
        s.exit = s.f.NewBlock(ssa.BlockExit)
 
-       // TODO(khr): all args.  Make a struct containing args/returnvals, declare
-       // an FP which contains a pointer to that struct.
+       // Allocate starting values
+       s.startmem = s.f.Entry.NewValue(ssa.OpArg, ssa.TypeMem, ".mem")
+       s.fp = s.f.Entry.NewValue(ssa.OpFP, s.config.Uintptr, nil) // TODO: use generic pointer type (unsafe.Pointer?) instead
+       s.sp = s.f.Entry.NewValue(ssa.OpSP, s.config.Uintptr, nil)
 
        s.vars = map[string]*ssa.Value{}
        s.labels = map[string]*ssa.Block{}
@@ -44,6 +46,11 @@ func buildssa(fn *Node) *ssa.Func {
        s.startBlock(s.f.Entry)
        s.stmtList(fn.Nbody)
 
+       // fallthrough to exit
+       if b := s.endBlock(); b != nil {
+               addEdge(b, s.exit)
+       }
+
        // Finish up exit block
        s.startBlock(s.exit)
        s.exit.Control = s.mem()
@@ -58,7 +65,7 @@ func buildssa(fn *Node) *ssa.Func {
        return s.f
 }
 
-type ssaState struct {
+type state struct {
        // configuration (arch) information
        config *ssa.Config
 
@@ -83,10 +90,18 @@ type ssaState struct {
        // offsets of argument slots
        // unnamed and unused args are not listed.
        argOffsets map[string]int64
+
+       // starting values.  Memory, frame pointer, and stack pointer
+       startmem *ssa.Value
+       fp       *ssa.Value
+       sp       *ssa.Value
 }
 
 // startBlock sets the current block we're generating code in to b.
-func (s *ssaState) startBlock(b *ssa.Block) {
+func (s *state) startBlock(b *ssa.Block) {
+       if s.curBlock != nil {
+               log.Fatalf("starting block %v when block %v has not ended", b, s.curBlock)
+       }
        s.curBlock = b
        s.vars = map[string]*ssa.Value{}
 }
@@ -94,7 +109,7 @@ func (s *ssaState) startBlock(b *ssa.Block) {
 // endBlock marks the end of generating code for the current block.
 // Returns the (former) current block.  Returns nil if there is no current
 // block, i.e. if no code flows to the current execution point.
-func (s *ssaState) endBlock() *ssa.Block {
+func (s *state) endBlock() *ssa.Block {
        b := s.curBlock
        if b == nil {
                return nil
@@ -109,14 +124,14 @@ func (s *ssaState) endBlock() *ssa.Block {
 }
 
 // ssaStmtList converts the statement n to SSA and adds it to s.
-func (s *ssaState) stmtList(l *NodeList) {
+func (s *state) stmtList(l *NodeList) {
        for ; l != nil; l = l.Next {
                s.stmt(l.N)
        }
 }
 
 // ssaStmt converts the statement n to SSA and adds it to s.
-func (s *ssaState) stmt(n *Node) {
+func (s *state) stmt(n *Node) {
        s.stmtList(n.Ninit)
        switch n.Op {
 
@@ -145,35 +160,15 @@ func (s *ssaState) stmt(n *Node) {
        case OAS:
                // TODO(khr): colas?
                val := s.expr(n.Right)
-               if n.Left.Op == OINDREG {
-                       // indirect off a register (TODO: always SP?)
-                       // used for storing arguments to callees
-                       addr := s.f.Entry.NewValue(ssa.OpSPAddr, Ptrto(n.Right.Type), n.Left.Xoffset)
-                       s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem())
-               } else if n.Left.Op != ONAME {
-                       // some more complicated expression.  Rewrite to a store.  TODO
-                       addr := s.expr(n.Left) // TODO: wrap in &
-
-                       // TODO(khr): nil check
-                       s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, n.Right.Type, nil, addr, val, s.mem())
-               } else if !n.Left.Addable {
-                       // TODO
-                       log.Fatalf("assignment to non-addable value")
-               } else if n.Left.Class&PHEAP != 0 {
-                       // TODO
-                       log.Fatalf("assignment to heap value")
-               } else if n.Left.Class == PEXTERN {
-                       // assign to global variable
-                       addr := s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Left.Type), n.Left.Sym)
-                       s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem())
-               } else if n.Left.Class == PPARAMOUT {
-                       // store to parameter slot
-                       addr := s.f.Entry.NewValue(ssa.OpFPAddr, Ptrto(n.Right.Type), n.Left.Xoffset)
-                       s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem())
-               } else {
-                       // normal variable
+               if n.Left.Op == ONAME && !n.Left.Addrtaken && n.Left.Class&PHEAP == 0 && n.Left.Class != PEXTERN && n.Left.Class != PPARAMOUT {
+                       // ssa-able variable.
                        s.vars[n.Left.Sym.Name] = val
+                       return
                }
+               // not ssa-able.  Treat as a store.
+               addr := s.addr(n.Left)
+               s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem())
+               // TODO: try to make more variables registerizeable.
        case OIF:
                cond := s.expr(n.Ntest)
                b := s.endBlock()
@@ -254,7 +249,7 @@ func (s *ssaState) stmt(n *Node) {
 }
 
 // expr converts the expression n to ssa, adds it to s and returns the ssa result.
-func (s *ssaState) expr(n *Node) *ssa.Value {
+func (s *state) expr(n *Node) *ssa.Value {
        if n == nil {
                // TODO(khr): is this nil???
                return s.f.Entry.NewValue(ssa.OpConst, n.Type, nil)
@@ -269,7 +264,6 @@ func (s *ssaState) expr(n *Node) *ssa.Value {
                }
                s.argOffsets[n.Sym.Name] = n.Xoffset
                return s.variable(n.Sym.Name, n.Type)
-               // binary ops
        case OLITERAL:
                switch n.Val.Ctype {
                case CTINT:
@@ -278,6 +272,8 @@ func (s *ssaState) expr(n *Node) *ssa.Value {
                        log.Fatalf("unhandled OLITERAL %v", n.Val.Ctype)
                        return nil
                }
+
+               // binary ops
        case OLT:
                a := s.expr(n.Left)
                b := s.expr(n.Right)
@@ -286,56 +282,36 @@ func (s *ssaState) expr(n *Node) *ssa.Value {
                a := s.expr(n.Left)
                b := s.expr(n.Right)
                return s.curBlock.NewValue2(ssa.OpAdd, a.Type, nil, a, b)
-
        case OSUB:
                // TODO:(khr) fold code for all binary ops together somehow
                a := s.expr(n.Left)
                b := s.expr(n.Right)
                return s.curBlock.NewValue2(ssa.OpSub, a.Type, nil, a, b)
 
+       case OADDR:
+               return s.addr(n.Left)
+
        case OIND:
                p := s.expr(n.Left)
-               c := s.curBlock.NewValue1(ssa.OpIsNonNil, ssa.TypeBool, nil, p)
-               b := s.endBlock()
-               b.Kind = ssa.BlockIf
-               b.Control = c
-               bNext := s.f.NewBlock(ssa.BlockPlain)
-               addEdge(b, bNext)
-               addEdge(b, s.exit)
-               s.startBlock(bNext)
-               // TODO(khr): if ptr check fails, don't go directly to exit.
-               // Instead, go to a call to panicnil or something.
-               // TODO: implicit nil checks somehow?
-
+               s.nilCheck(p)
                return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem())
+
        case ODOTPTR:
                p := s.expr(n.Left)
-               // TODO: nilcheck
-               p = s.curBlock.NewValue2(ssa.OpAdd, p.Type, nil, p, s.f.ConstInt(s.config.UIntPtr, n.Xoffset))
+               s.nilCheck(p)
+               p = s.curBlock.NewValue2(ssa.OpAdd, p.Type, nil, p, s.f.ConstInt(s.config.Uintptr, n.Xoffset))
                return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem())
 
        case OINDEX:
-               // TODO: slice vs array?  Map index is already reduced to a function call
-               a := s.expr(n.Left)
-               i := s.expr(n.Right)
-               // convert index to full width
-               // TODO: if index is 64-bit and we're compiling to 32-bit, check that high
-               // 32 bits are zero (and use a low32 op instead of convnop here).
-               i = s.curBlock.NewValue1(ssa.OpConvNop, s.config.UIntPtr, nil, i)
-
-               // bounds check
-               len := s.curBlock.NewValue1(ssa.OpSliceLen, s.config.UIntPtr, nil, a)
-               cmp := s.curBlock.NewValue2(ssa.OpIsInBounds, ssa.TypeBool, nil, i, len)
-               b := s.endBlock()
-               b.Kind = ssa.BlockIf
-               b.Control = cmp
-               bNext := s.f.NewBlock(ssa.BlockPlain)
-               addEdge(b, bNext)
-               addEdge(b, s.exit)
-               s.startBlock(bNext)
-               // TODO: don't go directly to s.exit.  Go to a stub that calls panicindex first.
-
-               return s.curBlock.NewValue3(ssa.OpSliceIndex, n.Left.Type.Type, nil, a, i, s.mem())
+               if n.Left.Type.Bound >= 0 { // array
+                       a := s.expr(n.Left)
+                       i := s.expr(n.Right)
+                       s.boundsCheck(i, s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound))
+                       return s.curBlock.NewValue2(ssa.OpArrayIndex, n.Left.Type.Type, nil, a, i)
+               } else { // slice
+                       p := s.addr(n)
+                       return s.curBlock.NewValue2(ssa.OpLoad, n.Left.Type.Type, nil, p, s.mem())
+               }
 
        case OCALLFUNC:
                // run all argument assignments
@@ -359,7 +335,7 @@ func (s *ssaState) expr(n *Node) *ssa.Value {
                s.startBlock(bNext)
                var titer Iter
                fp := Structfirst(&titer, Getoutarg(n.Left.Type))
-               a := s.f.Entry.NewValue(ssa.OpSPAddr, Ptrto(fp.Type), fp.Width)
+               a := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(fp.Type), fp.Width, s.sp)
                return s.curBlock.NewValue2(ssa.OpLoad, fp.Type, nil, a, call)
        default:
                log.Fatalf("unhandled expr %s", opnames[n.Op])
@@ -367,8 +343,81 @@ func (s *ssaState) expr(n *Node) *ssa.Value {
        }
 }
 
+// expr converts the address of the expression n to SSA, adds it to s and returns the SSA result.
+func (s *state) addr(n *Node) *ssa.Value {
+       switch n.Op {
+       case ONAME:
+               if n.Class == PEXTERN {
+                       // global variable
+                       return s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Type), n.Sym)
+               }
+               if n.Class == PPARAMOUT {
+                       // store to parameter slot
+                       return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.fp)
+               }
+               // TODO: address of locals
+               log.Fatalf("variable address of %v not implemented", n)
+               return nil
+       case OINDREG:
+               // indirect off a register (TODO: always SP?)
+               // used for storing/loading arguments/returns to/from callees
+               return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.sp)
+       case OINDEX:
+               if n.Left.Type.Bound >= 0 { // array
+                       a := s.addr(n.Left)
+                       i := s.expr(n.Right)
+                       len := s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound)
+                       s.boundsCheck(i, len)
+                       return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, a, i)
+               } else { // slice
+                       a := s.expr(n.Left)
+                       i := s.expr(n.Right)
+                       len := s.curBlock.NewValue1(ssa.OpSliceLen, s.config.Uintptr, nil, a)
+                       s.boundsCheck(i, len)
+                       p := s.curBlock.NewValue1(ssa.OpSlicePtr, Ptrto(n.Left.Type.Type), nil, a)
+                       return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, p, i)
+               }
+       default:
+               log.Fatalf("addr: bad op %v", n.Op)
+               return nil
+       }
+}
+
+// nilCheck generates nil pointer checking code.
+// Starts a new block on return.
+func (s *state) nilCheck(ptr *ssa.Value) {
+       c := s.curBlock.NewValue1(ssa.OpIsNonNil, ssa.TypeBool, nil, ptr)
+       b := s.endBlock()
+       b.Kind = ssa.BlockIf
+       b.Control = c
+       bNext := s.f.NewBlock(ssa.BlockPlain)
+       addEdge(b, bNext)
+       addEdge(b, s.exit)
+       s.startBlock(bNext)
+       // TODO(khr): Don't go directly to exit.  Go to a stub that calls panicmem first.
+       // TODO: implicit nil checks somehow?
+}
+
+// boundsCheck generates bounds checking code.  Checks if 0 <= idx < len, branches to exit if not.
+// Starts a new block on return.
+func (s *state) boundsCheck(idx, len *ssa.Value) {
+       // TODO: convert index to full width?
+       // TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero.
+
+       // bounds check
+       cmp := s.curBlock.NewValue2(ssa.OpIsInBounds, ssa.TypeBool, nil, idx, len)
+       b := s.endBlock()
+       b.Kind = ssa.BlockIf
+       b.Control = cmp
+       bNext := s.f.NewBlock(ssa.BlockPlain)
+       addEdge(b, bNext)
+       addEdge(b, s.exit)
+       // TODO: don't go directly to s.exit.  Go to a stub that calls panicindex first.
+       s.startBlock(bNext)
+}
+
 // variable returns the value of a variable at the current location.
-func (s *ssaState) variable(name string, t ssa.Type) *ssa.Value {
+func (s *state) variable(name string, t ssa.Type) *ssa.Value {
        if s.curBlock == nil {
                log.Fatalf("nil curblock!")
        }
@@ -381,11 +430,11 @@ func (s *ssaState) variable(name string, t ssa.Type) *ssa.Value {
        return v
 }
 
-func (s *ssaState) mem() *ssa.Value {
+func (s *state) mem() *ssa.Value {
        return s.variable(".mem", ssa.TypeMem)
 }
 
-func (s *ssaState) linkForwardReferences() {
+func (s *state) linkForwardReferences() {
        // Build ssa graph.  Each variable on its first use in a basic block
        // leaves a FwdRef in that block representing the incoming value
        // of that variable.  This function links that ref up with possible definitions,
@@ -406,17 +455,16 @@ func (s *ssaState) linkForwardReferences() {
 }
 
 // lookupVarIncoming finds the variable's value at the start of block b.
-func (s *ssaState) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
+func (s *state) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
        // TODO(khr): have lookupVarIncoming overwrite the fwdRef or copy it
        // will be used in, instead of having the result used in a copy value.
        if b == s.f.Entry {
                if name == ".mem" {
-                       return b.NewValue(ssa.OpArg, t, name)
+                       return s.startmem
                }
                // variable is live at the entry block.  Load it.
-               a := s.f.Entry.NewValue(ssa.OpFPAddr, Ptrto(t.(*Type)), s.argOffsets[name])
-               m := b.NewValue(ssa.OpArg, ssa.TypeMem, ".mem") // TODO: reuse mem starting value
-               return b.NewValue2(ssa.OpLoad, t, nil, a, m)
+               addr := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(t.(*Type)), s.argOffsets[name], s.fp)
+               return b.NewValue2(ssa.OpLoad, t, nil, addr, s.startmem)
        }
        var vals []*ssa.Value
        for _, p := range b.Preds {
@@ -435,7 +483,7 @@ func (s *ssaState) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa
 }
 
 // lookupVarOutgoing finds the variable's value at the end of block b.
-func (s *ssaState) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
+func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name string) *ssa.Value {
        m := s.defvars[b.ID]
        if v, ok := m[name]; ok {
                return v
@@ -568,13 +616,23 @@ func genValue(v *ssa.Value, frameSize int64) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
        case ssa.OpCMPQ:
-               x := regnum(v.Args[0])
-               y := regnum(v.Args[1])
                p := Prog(x86.ACMPQ)
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = x
+               p.From.Reg = regnum(v.Args[0])
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v.Args[1])
+       case ssa.OpCMPCQ:
+               p := Prog(x86.ACMPQ)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = regnum(v.Args[0])
+               p.To.Type = obj.TYPE_CONST
+               p.To.Offset = v.Aux.(int64)
+       case ssa.OpTESTB:
+               p := Prog(x86.ATESTB)
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = regnum(v.Args[0])
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = y
+               p.To.Reg = regnum(v.Args[1])
        case ssa.OpMOVQconst:
                x := regnum(v)
                p := Prog(x86.AMOVQ)
@@ -582,22 +640,57 @@ func genValue(v *ssa.Value, frameSize int64) {
                p.From.Offset = v.Aux.(int64)
                p.To.Type = obj.TYPE_REG
                p.To.Reg = x
-       case ssa.OpMOVQloadFP:
-               x := regnum(v)
+       case ssa.OpMOVQload:
                p := Prog(x86.AMOVQ)
                p.From.Type = obj.TYPE_MEM
-               p.From.Reg = x86.REG_SP
-               p.From.Offset = v.Aux.(int64) + frameSize
+               if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
+                       // TODO: do the fp/sp adjustment somewhere else?
+                       p.From.Reg = x86.REG_SP
+                       p.From.Offset = v.Aux.(int64) + frameSize
+               } else {
+                       p.From.Reg = regnum(v.Args[0])
+                       p.From.Offset = v.Aux.(int64)
+               }
                p.To.Type = obj.TYPE_REG
-               p.To.Reg = x
-       case ssa.OpMOVQstoreFP:
-               x := regnum(v.Args[0])
+               p.To.Reg = regnum(v)
+       case ssa.OpMOVBload:
+               p := Prog(x86.AMOVB)
+               p.From.Type = obj.TYPE_MEM
+               if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
+                       p.From.Reg = x86.REG_SP
+                       p.From.Offset = v.Aux.(int64) + frameSize
+               } else {
+                       p.From.Reg = regnum(v.Args[0])
+                       p.From.Offset = v.Aux.(int64)
+               }
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v)
+       case ssa.OpMOVQloadidx8:
+               p := Prog(x86.AMOVQ)
+               p.From.Type = obj.TYPE_MEM
+               if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
+                       p.From.Reg = x86.REG_SP
+                       p.From.Offset = v.Aux.(int64) + frameSize
+               } else {
+                       p.From.Reg = regnum(v.Args[0])
+                       p.From.Offset = v.Aux.(int64)
+               }
+               p.From.Scale = 8
+               p.From.Index = regnum(v.Args[1])
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v)
+       case ssa.OpMOVQstore:
                p := Prog(x86.AMOVQ)
                p.From.Type = obj.TYPE_REG
-               p.From.Reg = x
+               p.From.Reg = regnum(v.Args[1])
                p.To.Type = obj.TYPE_MEM
-               p.To.Reg = x86.REG_SP
-               p.To.Offset = v.Aux.(int64) + frameSize
+               if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" {
+                       p.To.Reg = x86.REG_SP
+                       p.To.Offset = v.Aux.(int64) + frameSize
+               } else {
+                       p.To.Reg = regnum(v.Args[0])
+                       p.To.Offset = v.Aux.(int64)
+               }
        case ssa.OpCopy:
                x := regnum(v.Args[0])
                y := regnum(v)
@@ -638,8 +731,19 @@ func genValue(v *ssa.Value, frameSize int64) {
        case ssa.OpArg:
                // memory arg needs no code
                // TODO: only mem arg goes here.
+       case ssa.OpLEAQglobal:
+               g := v.Aux.(ssa.GlobalOffset)
+               p := Prog(x86.ALEAQ)
+               p.From.Type = obj.TYPE_MEM
+               p.From.Name = obj.NAME_EXTERN
+               p.From.Sym = Linksym(g.Global.(*Sym))
+               p.From.Offset = g.Offset
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = regnum(v)
+       case ssa.OpFP, ssa.OpSP:
+               // nothing to do
        default:
-               log.Fatalf("value %v not implemented yet", v)
+               log.Fatalf("value %s not implemented yet", v.LongString())
        }
 }
 
@@ -653,6 +757,40 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
                }
        case ssa.BlockExit:
                Prog(obj.ARET)
+       case ssa.BlockEQ:
+               if b.Succs[0] == next {
+                       p := Prog(x86.AJNE)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[1]})
+               } else if b.Succs[1] == next {
+                       p := Prog(x86.AJEQ)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+               } else {
+                       p := Prog(x86.AJEQ)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+                       q := Prog(obj.AJMP)
+                       q.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{q, b.Succs[1]})
+               }
+       case ssa.BlockNE:
+               if b.Succs[0] == next {
+                       p := Prog(x86.AJEQ)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[1]})
+               } else if b.Succs[1] == next {
+                       p := Prog(x86.AJNE)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+               } else {
+                       p := Prog(x86.AJNE)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+                       q := Prog(obj.AJMP)
+                       q.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{q, b.Succs[1]})
+               }
        case ssa.BlockLT:
                if b.Succs[0] == next {
                        p := Prog(x86.AJGE)
@@ -670,8 +808,43 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch {
                        q.To.Type = obj.TYPE_BRANCH
                        branches = append(branches, branch{q, b.Succs[1]})
                }
+       case ssa.BlockULT:
+               if b.Succs[0] == next {
+                       p := Prog(x86.AJCC)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[1]})
+               } else if b.Succs[1] == next {
+                       p := Prog(x86.AJCS)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+               } else {
+                       p := Prog(x86.AJCS)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+                       q := Prog(obj.AJMP)
+                       q.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{q, b.Succs[1]})
+               }
+       case ssa.BlockUGT:
+               if b.Succs[0] == next {
+                       p := Prog(x86.AJLS)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[1]})
+               } else if b.Succs[1] == next {
+                       p := Prog(x86.AJHI)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+               } else {
+                       p := Prog(x86.AJHI)
+                       p.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{p, b.Succs[0]})
+                       q := Prog(obj.AJMP)
+                       q.To.Type = obj.TYPE_BRANCH
+                       branches = append(branches, branch{q, b.Succs[1]})
+               }
+
        default:
-               log.Fatalf("branch at %v not implemented yet", b)
+               log.Fatalf("branch %s not implemented yet", b.LongString())
        }
        return branches
 }
index c7f46e1801c7ffcb7d4d355eeb94e18cf89c836c..e36cb9e7a3c69ca8acf84ef526babf1478b66261 100644 (file)
@@ -110,23 +110,23 @@ const (
        AINTO
        AIRETL
        AIRETW
-       AJCC
-       AJCS
+       AJCC // >= unsigned
+       AJCS // < unsigned
        AJCXZL
-       AJEQ
-       AJGE
-       AJGT
-       AJHI
-       AJLE
-       AJLS
-       AJLT
-       AJMI
-       AJNE
-       AJOC
-       AJOS
-       AJPC
-       AJPL
-       AJPS
+       AJEQ // == (zero)
+       AJGE // >= signed
+       AJGT // > signed
+       AJHI // > unsigned
+       AJLE // <= signed
+       AJLS // <= unsigned
+       AJLT // < signed
+       AJMI // sign bit set (negative)
+       AJNE // != (nonzero)
+       AJOC // overflow clear
+       AJOS // overflow set
+       AJPC // parity clear
+       AJPL // sign bit clear (positive)
+       AJPS // parity set
        ALAHF
        ALARL
        ALARW
index 453388a89972bc0ff999df196bd97a3aa03ccddb..667313ad9f825e34a9417f6fd5329b2a7e95b2ca 100644 (file)
@@ -58,7 +58,7 @@ func checkFunc(f *Func) {
                        if b.Control == nil {
                                log.Panicf("exit block %s has no control value", b)
                        }
-                       if b.Control.Type != TypeMem {
+                       if !b.Control.Type.IsMemory() {
                                log.Panicf("exit block %s has non-memory control value %s", b, b.Control.LongString())
                        }
                case BlockPlain:
@@ -75,7 +75,7 @@ func checkFunc(f *Func) {
                        if b.Control == nil {
                                log.Panicf("if block %s has no control value", b)
                        }
-                       if b.Control.Type != TypeBool {
+                       if !b.Control.Type.IsBoolean() {
                                log.Panicf("if block %s has non-bool control value %s", b, b.Control.LongString())
                        }
                case BlockCall:
@@ -85,7 +85,7 @@ func checkFunc(f *Func) {
                        if b.Control == nil {
                                log.Panicf("call block %s has no control value", b)
                        }
-                       if b.Control.Type != TypeMem {
+                       if !b.Control.Type.IsMemory() {
                                log.Panicf("call block %s has non-memory control value %s", b, b.Control.LongString())
                        }
                        if b.Succs[1].Kind != BlockExit {
index 80acda4b23966f701bc9ea3565ab7ecdb6978a51..9f1d2a8593ae39dca7da8753ce4dfaf7a46116ad 100644 (file)
@@ -9,7 +9,7 @@ import "log"
 type Config struct {
        arch    string            // "amd64", etc.
        ptrSize int64             // 4 or 8
-       UIntPtr Type              // pointer arithmetic type
+       Uintptr Type              // pointer arithmetic type
        lower   func(*Value) bool // lowering function
 
        // TODO: more stuff.  Compiler flags of interest, ...
@@ -30,9 +30,9 @@ func NewConfig(arch string) *Config {
        }
 
        // cache the intptr type in the config
-       c.UIntPtr = TypeUInt32
+       c.Uintptr = TypeUInt32
        if c.ptrSize == 8 {
-               c.UIntPtr = TypeUInt64
+               c.Uintptr = TypeUInt64
        }
 
        return c
index 2a96793c6175d319655783483b212fbbf419740c..91f9c17d1139598a02b24f8063f087cbf676e745 100644 (file)
@@ -6,20 +6,20 @@ func genericRules(v *Value) bool {
        switch v.Op {
        case OpAdd:
                // match: (Add <t> (Const [c]) (Const [d]))
-               // cond: is64BitInt(t) && isSigned(t)
+               // cond: is64BitInt(t)
                // result: (Const [{c.(int64)+d.(int64)}])
                {
                        t := v.Type
                        if v.Args[0].Op != OpConst {
-                               goto endc86f5c160a87f6f5ec90b6551ec099d9
+                               goto end8d047ed0ae9537b840adc79ea82c6e05
                        }
                        c := v.Args[0].Aux
                        if v.Args[1].Op != OpConst {
-                               goto endc86f5c160a87f6f5ec90b6551ec099d9
+                               goto end8d047ed0ae9537b840adc79ea82c6e05
                        }
                        d := v.Args[1].Aux
-                       if !(is64BitInt(t) && isSigned(t)) {
-                               goto endc86f5c160a87f6f5ec90b6551ec099d9
+                       if !(is64BitInt(t)) {
+                               goto end8d047ed0ae9537b840adc79ea82c6e05
                        }
                        v.Op = OpConst
                        v.Aux = nil
@@ -27,100 +27,141 @@ func genericRules(v *Value) bool {
                        v.Aux = c.(int64) + d.(int64)
                        return true
                }
-               goto endc86f5c160a87f6f5ec90b6551ec099d9
-       endc86f5c160a87f6f5ec90b6551ec099d9:
+               goto end8d047ed0ae9537b840adc79ea82c6e05
+       end8d047ed0ae9537b840adc79ea82c6e05:
                ;
-               // match: (Add <t> (Const [c]) (Const [d]))
-               // cond: is64BitInt(t) && !isSigned(t)
-               // result: (Const [{c.(uint64)+d.(uint64)}])
+       case OpArrayIndex:
+               // match: (ArrayIndex (Load ptr mem) idx)
+               // cond:
+               // result: (Load (PtrIndex <ptr.Type.Elem().Elem().PtrTo()> ptr idx) mem)
+               {
+                       if v.Args[0].Op != OpLoad {
+                               goto end3809f4c52270a76313e4ea26e6f0b753
+                       }
+                       ptr := v.Args[0].Args[0]
+                       mem := v.Args[0].Args[1]
+                       idx := v.Args[1]
+                       v.Op = OpLoad
+                       v.Aux = nil
+                       v.resetArgs()
+                       v0 := v.Block.NewValue(OpPtrIndex, TypeInvalid, nil)
+                       v0.Type = ptr.Type.Elem().Elem().PtrTo()
+                       v0.AddArg(ptr)
+                       v0.AddArg(idx)
+                       v.AddArg(v0)
+                       v.AddArg(mem)
+                       return true
+               }
+               goto end3809f4c52270a76313e4ea26e6f0b753
+       end3809f4c52270a76313e4ea26e6f0b753:
+               ;
+       case OpIsInBounds:
+               // match: (IsInBounds (Const [c]) (Const [d]))
+               // cond:
+               // result: (Const [inBounds(c.(int64),d.(int64))])
+               {
+                       if v.Args[0].Op != OpConst {
+                               goto enddbd1a394d9b71ee64335361b8384865c
+                       }
+                       c := v.Args[0].Aux
+                       if v.Args[1].Op != OpConst {
+                               goto enddbd1a394d9b71ee64335361b8384865c
+                       }
+                       d := v.Args[1].Aux
+                       v.Op = OpConst
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.Aux = inBounds(c.(int64), d.(int64))
+                       return true
+               }
+               goto enddbd1a394d9b71ee64335361b8384865c
+       enddbd1a394d9b71ee64335361b8384865c:
+               ;
+       case OpMul:
+               // match: (Mul <t> (Const [c]) (Const [d]))
+               // cond: is64BitInt(t)
+               // result: (Const [{c.(int64)*d.(int64)}])
                {
                        t := v.Type
                        if v.Args[0].Op != OpConst {
-                               goto end8941c2a515c1bd38530b7fd96862bac4
+                               goto end776610f88cf04f438242d76ed2b14f1c
                        }
                        c := v.Args[0].Aux
                        if v.Args[1].Op != OpConst {
-                               goto end8941c2a515c1bd38530b7fd96862bac4
+                               goto end776610f88cf04f438242d76ed2b14f1c
                        }
                        d := v.Args[1].Aux
-                       if !(is64BitInt(t) && !isSigned(t)) {
-                               goto end8941c2a515c1bd38530b7fd96862bac4
+                       if !(is64BitInt(t)) {
+                               goto end776610f88cf04f438242d76ed2b14f1c
                        }
                        v.Op = OpConst
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = c.(uint64) + d.(uint64)
+                       v.Aux = c.(int64) * d.(int64)
                        return true
                }
-               goto end8941c2a515c1bd38530b7fd96862bac4
-       end8941c2a515c1bd38530b7fd96862bac4:
+               goto end776610f88cf04f438242d76ed2b14f1c
+       end776610f88cf04f438242d76ed2b14f1c:
                ;
-       case OpSliceCap:
-               // match: (SliceCap (Load ptr mem))
+       case OpPtrIndex:
+               // match: (PtrIndex <t> ptr idx)
                // cond:
-               // result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.UIntPtr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
+               // result: (Add ptr (Mul <v.Block.Func.Config.Uintptr> idx (Const <v.Block.Func.Config.Uintptr> [t.Elem().Size()])))
                {
-                       if v.Args[0].Op != OpLoad {
-                               goto ende03f9b79848867df439b56889bb4e55d
-                       }
-                       ptr := v.Args[0].Args[0]
-                       mem := v.Args[0].Args[1]
-                       v.Op = OpLoad
+                       t := v.Type
+                       ptr := v.Args[0]
+                       idx := v.Args[1]
+                       v.Op = OpAdd
                        v.Aux = nil
                        v.resetArgs()
-                       v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil)
-                       v0.Type = ptr.Type
-                       v0.AddArg(ptr)
+                       v.AddArg(ptr)
+                       v0 := v.Block.NewValue(OpMul, TypeInvalid, nil)
+                       v0.Type = v.Block.Func.Config.Uintptr
+                       v0.AddArg(idx)
                        v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
-                       v1.Type = v.Block.Func.Config.UIntPtr
-                       v1.Aux = int64(v.Block.Func.Config.ptrSize * 2)
+                       v1.Type = v.Block.Func.Config.Uintptr
+                       v1.Aux = t.Elem().Size()
                        v0.AddArg(v1)
                        v.AddArg(v0)
-                       v.AddArg(mem)
                        return true
                }
-               goto ende03f9b79848867df439b56889bb4e55d
-       ende03f9b79848867df439b56889bb4e55d:
+               goto end383c68c41e72d22ef00c4b7b0fddcbb8
+       end383c68c41e72d22ef00c4b7b0fddcbb8:
                ;
-       case OpSliceIndex:
-               // match: (SliceIndex s i mem)
+       case OpSliceCap:
+               // match: (SliceCap (Load ptr mem))
                // cond:
-               // result: (Load (Add <s.Type.Elem().PtrTo()> (SlicePtr <s.Type.Elem().PtrTo()> s) (Mul <v.Block.Func.Config.UIntPtr> i (Const <v.Block.Func.Config.UIntPtr> [s.Type.Elem().Size()]))) mem)
+               // result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
                {
-                       s := v.Args[0]
-                       i := v.Args[1]
-                       mem := v.Args[2]
+                       if v.Args[0].Op != OpLoad {
+                               goto endbf1d4db93c4664ed43be3f73afb4dfa3
+                       }
+                       ptr := v.Args[0].Args[0]
+                       mem := v.Args[0].Args[1]
                        v.Op = OpLoad
                        v.Aux = nil
                        v.resetArgs()
                        v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil)
-                       v0.Type = s.Type.Elem().PtrTo()
-                       v1 := v.Block.NewValue(OpSlicePtr, TypeInvalid, nil)
-                       v1.Type = s.Type.Elem().PtrTo()
-                       v1.AddArg(s)
+                       v0.Type = ptr.Type
+                       v0.AddArg(ptr)
+                       v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+                       v1.Type = v.Block.Func.Config.Uintptr
+                       v1.Aux = int64(v.Block.Func.Config.ptrSize * 2)
                        v0.AddArg(v1)
-                       v2 := v.Block.NewValue(OpMul, TypeInvalid, nil)
-                       v2.Type = v.Block.Func.Config.UIntPtr
-                       v2.AddArg(i)
-                       v3 := v.Block.NewValue(OpConst, TypeInvalid, nil)
-                       v3.Type = v.Block.Func.Config.UIntPtr
-                       v3.Aux = s.Type.Elem().Size()
-                       v2.AddArg(v3)
-                       v0.AddArg(v2)
                        v.AddArg(v0)
                        v.AddArg(mem)
                        return true
                }
-               goto end733704831a61760840348f790b3ab045
-       end733704831a61760840348f790b3ab045:
+               goto endbf1d4db93c4664ed43be3f73afb4dfa3
+       endbf1d4db93c4664ed43be3f73afb4dfa3:
                ;
        case OpSliceLen:
                // match: (SliceLen (Load ptr mem))
                // cond:
-               // result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.UIntPtr> [int64(v.Block.Func.Config.ptrSize)])) mem)
+               // result: (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize)])) mem)
                {
                        if v.Args[0].Op != OpLoad {
-                               goto ende94950a57eca1871c93afdeaadb90223
+                               goto end9190b1ecbda4c5dd6d3e05d2495fb297
                        }
                        ptr := v.Args[0].Args[0]
                        mem := v.Args[0].Args[1]
@@ -131,15 +172,15 @@ func genericRules(v *Value) bool {
                        v0.Type = ptr.Type
                        v0.AddArg(ptr)
                        v1 := v.Block.NewValue(OpConst, TypeInvalid, nil)
-                       v1.Type = v.Block.Func.Config.UIntPtr
+                       v1.Type = v.Block.Func.Config.Uintptr
                        v1.Aux = int64(v.Block.Func.Config.ptrSize)
                        v0.AddArg(v1)
                        v.AddArg(v0)
                        v.AddArg(mem)
                        return true
                }
-               goto ende94950a57eca1871c93afdeaadb90223
-       ende94950a57eca1871c93afdeaadb90223:
+               goto end9190b1ecbda4c5dd6d3e05d2495fb297
+       end9190b1ecbda4c5dd6d3e05d2495fb297:
                ;
        case OpSlicePtr:
                // match: (SlicePtr (Load ptr mem))
@@ -160,6 +201,36 @@ func genericRules(v *Value) bool {
                }
                goto end459613b83f95b65729d45c2ed663a153
        end459613b83f95b65729d45c2ed663a153:
+               ;
+       case OpStore:
+               // match: (Store dst (Load <t> src mem) mem)
+               // cond: t.Size() > 8
+               // result: (Move [t.Size()] dst src mem)
+               {
+                       dst := v.Args[0]
+                       if v.Args[1].Op != OpLoad {
+                               goto end324ffb6d2771808da4267f62c854e9c8
+                       }
+                       t := v.Args[1].Type
+                       src := v.Args[1].Args[0]
+                       mem := v.Args[1].Args[1]
+                       if v.Args[2] != v.Args[1].Args[1] {
+                               goto end324ffb6d2771808da4267f62c854e9c8
+                       }
+                       if !(t.Size() > 8) {
+                               goto end324ffb6d2771808da4267f62c854e9c8
+                       }
+                       v.Op = OpMove
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.Aux = t.Size()
+                       v.AddArg(dst)
+                       v.AddArg(src)
+                       v.AddArg(mem)
+                       return true
+               }
+               goto end324ffb6d2771808da4267f62c854e9c8
+       end324ffb6d2771808da4267f62c854e9c8:
        }
        return false
 }
index 82e5d23241da343cb79fc69c68f9659cb8f5e7c0..84379c00decc5ff280700de9aa622aecba68d868 100644 (file)
@@ -16,41 +16,88 @@ func lower(f *Func) {
        // additional pass for 386/amd64, link condition codes directly to blocks
        // TODO: do generically somehow?  Special "block" rewrite rules?
        for _, b := range f.Blocks {
-               switch b.Kind {
-               case BlockIf:
-                       switch b.Control.Op {
-                       case OpSETL:
-                               b.Kind = BlockLT
-                               b.Control = b.Control.Args[0]
-                       case OpSETNE:
-                               b.Kind = BlockNE
-                               b.Control = b.Control.Args[0]
-                       case OpSETB:
-                               b.Kind = BlockULT
-                               b.Control = b.Control.Args[0]
-                               // TODO: others
+               for {
+                       switch b.Kind {
+                       case BlockIf:
+                               switch b.Control.Op {
+                               case OpSETL:
+                                       b.Kind = BlockLT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               case OpSETNE:
+                                       b.Kind = BlockNE
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               case OpSETB:
+                                       b.Kind = BlockULT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               case OpMOVBload:
+                                       b.Kind = BlockNE
+                                       b.Control = b.NewValue2(OpTESTB, TypeFlags, nil, b.Control, b.Control)
+                                       continue
+                                       // TODO: others
+                               }
+                       case BlockLT:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockGT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockGT:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockLT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockLE:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockGE
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockGE:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockLE
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockULT:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockUGT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockUGT:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockULT
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockULE:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockUGE
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockUGE:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Kind = BlockULE
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockEQ:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
+                       case BlockNE:
+                               if b.Control.Op == OpInvertFlags {
+                                       b.Control = b.Control.Args[0]
+                                       continue
+                               }
                        }
-               case BlockLT:
-                       if b.Control.Op == OpInvertFlags {
-                               b.Kind = BlockGE
-                               b.Control = b.Control.Args[0]
-                       }
-               case BlockULT:
-                       if b.Control.Op == OpInvertFlags {
-                               b.Kind = BlockUGE
-                               b.Control = b.Control.Args[0]
-                       }
-               case BlockEQ:
-                       if b.Control.Op == OpInvertFlags {
-                               b.Kind = BlockNE
-                               b.Control = b.Control.Args[0]
-                       }
-               case BlockNE:
-                       if b.Control.Op == OpInvertFlags {
-                               b.Kind = BlockEQ
-                               b.Control = b.Control.Args[0]
-                       }
-                       // TODO: others
+                       break
                }
        }
 }
index ef891c37d9dbeff6c375ff095184ed70919155aa..356f646dcc07efdd6ac27b33e33516271e765e6e 100644 (file)
@@ -7,11 +7,11 @@ func lowerAmd64(v *Value) bool {
        case OpADDCQ:
                // match: (ADDCQ [c] (LEAQ8 [d] x y))
                // cond:
-               // result: (LEAQ8 [c.(int64)+d.(int64)] x y)
+               // result: (LEAQ8 [addOff(c, d)] x y)
                {
                        c := v.Aux
                        if v.Args[0].Op != OpLEAQ8 {
-                               goto end16348939e556e99e8447227ecb986f01
+                               goto end3bc1457811adc0cb81ad6b88a7461c60
                        }
                        d := v.Args[0].Aux
                        x := v.Args[0].Args[0]
@@ -19,58 +19,40 @@ func lowerAmd64(v *Value) bool {
                        v.Op = OpLEAQ8
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = c.(int64) + d.(int64)
+                       v.Aux = addOff(c, d)
                        v.AddArg(x)
                        v.AddArg(y)
                        return true
                }
-               goto end16348939e556e99e8447227ecb986f01
-       end16348939e556e99e8447227ecb986f01:
+               goto end3bc1457811adc0cb81ad6b88a7461c60
+       end3bc1457811adc0cb81ad6b88a7461c60:
                ;
-               // match: (ADDCQ [off1] (FPAddr [off2]))
-               // cond:
-               // result: (FPAddr [off1.(int64)+off2.(int64)])
-               {
-                       off1 := v.Aux
-                       if v.Args[0].Op != OpFPAddr {
-                               goto end28e093ab0618066e6b2609db7aaf309b
-                       }
-                       off2 := v.Args[0].Aux
-                       v.Op = OpFPAddr
-                       v.Aux = nil
-                       v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
-                       return true
-               }
-               goto end28e093ab0618066e6b2609db7aaf309b
-       end28e093ab0618066e6b2609db7aaf309b:
-               ;
-               // match: (ADDCQ [off1] (SPAddr [off2]))
-               // cond:
-               // result: (SPAddr [off1.(int64)+off2.(int64)])
+               // match: (ADDCQ [off] x)
+               // cond: off.(int64) == 0
+               // result: (Copy x)
                {
-                       off1 := v.Aux
-                       if v.Args[0].Op != OpSPAddr {
-                               goto endd0c27c62d150b88168075c5ba113d1fa
+                       off := v.Aux
+                       x := v.Args[0]
+                       if !(off.(int64) == 0) {
+                               goto end6710a6679c47b70577ecea7ad00dae87
                        }
-                       off2 := v.Args[0].Aux
-                       v.Op = OpSPAddr
+                       v.Op = OpCopy
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
+                       v.AddArg(x)
                        return true
                }
-               goto endd0c27c62d150b88168075c5ba113d1fa
-       endd0c27c62d150b88168075c5ba113d1fa:
+               goto end6710a6679c47b70577ecea7ad00dae87
+       end6710a6679c47b70577ecea7ad00dae87:
                ;
        case OpADDQ:
-               // match: (ADDQ x (Const [c]))
+               // match: (ADDQ x (MOVQconst [c]))
                // cond:
                // result: (ADDCQ [c] x)
                {
                        x := v.Args[0]
-                       if v.Args[1].Op != OpConst {
-                               goto endef6908cfdf56e102cc327a3ddc14393d
+                       if v.Args[1].Op != OpMOVQconst {
+                               goto end39b79e84f20a6d44b5c4136aae220ac2
                        }
                        c := v.Args[1].Aux
                        v.Op = OpADDCQ
@@ -80,15 +62,15 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(x)
                        return true
                }
-               goto endef6908cfdf56e102cc327a3ddc14393d
-       endef6908cfdf56e102cc327a3ddc14393d:
+               goto end39b79e84f20a6d44b5c4136aae220ac2
+       end39b79e84f20a6d44b5c4136aae220ac2:
                ;
-               // match: (ADDQ (Const [c]) x)
+               // match: (ADDQ (MOVQconst [c]) x)
                // cond:
                // result: (ADDCQ [c] x)
                {
-                       if v.Args[0].Op != OpConst {
-                               goto endb54a32cf3147f424f08b46db62c69b23
+                       if v.Args[0].Op != OpMOVQconst {
+                               goto endc05ff5a2a132241b69d00c852001d820
                        }
                        c := v.Args[0].Aux
                        x := v.Args[1]
@@ -99,8 +81,8 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(x)
                        return true
                }
-               goto endb54a32cf3147f424f08b46db62c69b23
-       endb54a32cf3147f424f08b46db62c69b23:
+               goto endc05ff5a2a132241b69d00c852001d820
+       endc05ff5a2a132241b69d00c852001d820:
                ;
                // match: (ADDQ x (SHLCQ [shift] y))
                // cond: shift.(int64) == 3
@@ -168,13 +150,13 @@ func lowerAmd64(v *Value) bool {
        end35a02a1587264e40cf1055856ff8445a:
                ;
        case OpCMPQ:
-               // match: (CMPQ x (Const [c]))
+               // match: (CMPQ x (MOVQconst [c]))
                // cond:
                // result: (CMPCQ x [c])
                {
                        x := v.Args[0]
-                       if v.Args[1].Op != OpConst {
-                               goto end1770a40e4253d9f669559a360514613e
+                       if v.Args[1].Op != OpMOVQconst {
+                               goto endf180bae15b3d24c0213520d7f7aa98b4
                        }
                        c := v.Args[1].Aux
                        v.Op = OpCMPCQ
@@ -184,15 +166,15 @@ func lowerAmd64(v *Value) bool {
                        v.Aux = c
                        return true
                }
-               goto end1770a40e4253d9f669559a360514613e
-       end1770a40e4253d9f669559a360514613e:
+               goto endf180bae15b3d24c0213520d7f7aa98b4
+       endf180bae15b3d24c0213520d7f7aa98b4:
                ;
-               // match: (CMPQ (Const [c]) x)
+               // match: (CMPQ (MOVQconst [c]) x)
                // cond:
                // result: (InvertFlags (CMPCQ <TypeFlags> x [c]))
                {
-                       if v.Args[0].Op != OpConst {
-                               goto enda4e64c7eaeda16c1c0db9dac409cd126
+                       if v.Args[0].Op != OpMOVQconst {
+                               goto end8fc58bffa73b3df80b3de72c91844884
                        }
                        c := v.Args[0].Aux
                        x := v.Args[1]
@@ -206,8 +188,42 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(v0)
                        return true
                }
-               goto enda4e64c7eaeda16c1c0db9dac409cd126
-       enda4e64c7eaeda16c1c0db9dac409cd126:
+               goto end8fc58bffa73b3df80b3de72c91844884
+       end8fc58bffa73b3df80b3de72c91844884:
+               ;
+       case OpConst:
+               // match: (Const <t> [val])
+               // cond: is64BitInt(t)
+               // result: (MOVQconst [val])
+               {
+                       t := v.Type
+                       val := v.Aux
+                       if !(is64BitInt(t)) {
+                               goto end7f5c5b34093fbc6860524cb803ee51bf
+                       }
+                       v.Op = OpMOVQconst
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.Aux = val
+                       return true
+               }
+               goto end7f5c5b34093fbc6860524cb803ee51bf
+       end7f5c5b34093fbc6860524cb803ee51bf:
+               ;
+       case OpGlobal:
+               // match: (Global [sym])
+               // cond:
+               // result: (LEAQglobal [GlobalOffset{sym,0}])
+               {
+                       sym := v.Aux
+                       v.Op = OpLEAQglobal
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.Aux = GlobalOffset{sym, 0}
+                       return true
+               }
+               goto end3a3c76fac0e2e53c0e1c60b9524e6f1c
+       end3a3c76fac0e2e53c0e1c60b9524e6f1c:
                ;
        case OpIsInBounds:
                // match: (IsInBounds idx len)
@@ -273,16 +289,16 @@ func lowerAmd64(v *Value) bool {
                ;
        case OpLoad:
                // match: (Load <t> ptr mem)
-               // cond: (is64BitInt(t) || isPtr(t))
-               // result: (MOVQload [int64(0)] ptr mem)
+               // cond: t.IsBoolean()
+               // result: (MOVBload [int64(0)] ptr mem)
                {
                        t := v.Type
                        ptr := v.Args[0]
                        mem := v.Args[1]
-                       if !(is64BitInt(t) || isPtr(t)) {
-                               goto end581ce5a20901df1b8143448ba031685b
+                       if !(t.IsBoolean()) {
+                               goto end73f21632e56c3614902d3c29c82dc4ea
                        }
-                       v.Op = OpMOVQload
+                       v.Op = OpMOVBload
                        v.Aux = nil
                        v.resetArgs()
                        v.Aux = int64(0)
@@ -290,77 +306,38 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(mem)
                        return true
                }
-               goto end581ce5a20901df1b8143448ba031685b
-       end581ce5a20901df1b8143448ba031685b:
-               ;
-       case OpMOVQload:
-               // match: (MOVQload [off1] (FPAddr [off2]) mem)
-               // cond:
-               // result: (MOVQloadFP [off1.(int64)+off2.(int64)] mem)
-               {
-                       off1 := v.Aux
-                       if v.Args[0].Op != OpFPAddr {
-                               goto endce972b1aa84b56447978c43def87fa57
-                       }
-                       off2 := v.Args[0].Aux
-                       mem := v.Args[1]
-                       v.Op = OpMOVQloadFP
-                       v.Aux = nil
-                       v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
-                       v.AddArg(mem)
-                       return true
-               }
-               goto endce972b1aa84b56447978c43def87fa57
-       endce972b1aa84b56447978c43def87fa57:
+               goto end73f21632e56c3614902d3c29c82dc4ea
+       end73f21632e56c3614902d3c29c82dc4ea:
                ;
-               // match: (MOVQload [off1] (SPAddr [off2]) mem)
-               // cond:
-               // result: (MOVQloadSP [off1.(int64)+off2.(int64)] mem)
+               // match: (Load <t> ptr mem)
+               // cond: (is64BitInt(t) || isPtr(t))
+               // result: (MOVQload [int64(0)] ptr mem)
                {
-                       off1 := v.Aux
-                       if v.Args[0].Op != OpSPAddr {
-                               goto end3d8628a6536350a123be81240b8a1376
-                       }
-                       off2 := v.Args[0].Aux
+                       t := v.Type
+                       ptr := v.Args[0]
                        mem := v.Args[1]
-                       v.Op = OpMOVQloadSP
-                       v.Aux = nil
-                       v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
-                       v.AddArg(mem)
-                       return true
-               }
-               goto end3d8628a6536350a123be81240b8a1376
-       end3d8628a6536350a123be81240b8a1376:
-               ;
-               // match: (MOVQload [off] (Global [sym]) mem)
-               // cond:
-               // result: (MOVQloadglobal [GlobalOffset{sym,off.(int64)}] mem)
-               {
-                       off := v.Aux
-                       if v.Args[0].Op != OpGlobal {
-                               goto end20693899317f3f8d1b47fefa64087654
+                       if !(is64BitInt(t) || isPtr(t)) {
+                               goto end581ce5a20901df1b8143448ba031685b
                        }
-                       sym := v.Args[0].Aux
-                       mem := v.Args[1]
-                       v.Op = OpMOVQloadglobal
+                       v.Op = OpMOVQload
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = GlobalOffset{sym, off.(int64)}
+                       v.Aux = int64(0)
+                       v.AddArg(ptr)
                        v.AddArg(mem)
                        return true
                }
-               goto end20693899317f3f8d1b47fefa64087654
-       end20693899317f3f8d1b47fefa64087654:
+               goto end581ce5a20901df1b8143448ba031685b
+       end581ce5a20901df1b8143448ba031685b:
                ;
+       case OpMOVQload:
                // match: (MOVQload [off1] (ADDCQ [off2] ptr) mem)
                // cond:
-               // result: (MOVQload [off1.(int64)+off2.(int64)] ptr mem)
+               // result: (MOVQload [addOff(off1, off2)] ptr mem)
                {
                        off1 := v.Aux
                        if v.Args[0].Op != OpADDCQ {
-                               goto enda68a39292ba2a05b3436191cb0bb0516
+                               goto end218ceec16b8299d573d3c9ccaa69b086
                        }
                        off2 := v.Args[0].Aux
                        ptr := v.Args[0].Args[0]
@@ -368,21 +345,21 @@ func lowerAmd64(v *Value) bool {
                        v.Op = OpMOVQload
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
+                       v.Aux = addOff(off1, off2)
                        v.AddArg(ptr)
                        v.AddArg(mem)
                        return true
                }
-               goto enda68a39292ba2a05b3436191cb0bb0516
-       enda68a39292ba2a05b3436191cb0bb0516:
+               goto end218ceec16b8299d573d3c9ccaa69b086
+       end218ceec16b8299d573d3c9ccaa69b086:
                ;
                // match: (MOVQload [off1] (LEAQ8 [off2] ptr idx) mem)
                // cond:
-               // result: (MOVQloadidx8 [off1.(int64)+off2.(int64)] ptr idx mem)
+               // result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
                {
                        off1 := v.Aux
                        if v.Args[0].Op != OpLEAQ8 {
-                               goto endba0e5cee85021614041016b1a2709ab8
+                               goto end02f5ad148292c46463e7c20d3b821735
                        }
                        off2 := v.Args[0].Aux
                        ptr := v.Args[0].Args[0]
@@ -391,131 +368,117 @@ func lowerAmd64(v *Value) bool {
                        v.Op = OpMOVQloadidx8
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
+                       v.Aux = addOff(off1, off2)
                        v.AddArg(ptr)
                        v.AddArg(idx)
                        v.AddArg(mem)
                        return true
                }
-               goto endba0e5cee85021614041016b1a2709ab8
-       endba0e5cee85021614041016b1a2709ab8:
+               goto end02f5ad148292c46463e7c20d3b821735
+       end02f5ad148292c46463e7c20d3b821735:
                ;
-       case OpMOVQstore:
-               // match: (MOVQstore [off1] (FPAddr [off2]) val mem)
+       case OpMOVQloadidx8:
+               // match: (MOVQloadidx8 [off1] (ADDCQ [off2] ptr) idx mem)
                // cond:
-               // result: (MOVQstoreFP [off1.(int64)+off2.(int64)] val mem)
+               // result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
                {
                        off1 := v.Aux
-                       if v.Args[0].Op != OpFPAddr {
-                               goto end0a2a81a20558dfc93790aecb1e9cc81a
+                       if v.Args[0].Op != OpADDCQ {
+                               goto ende47e8d742e2615f39fb6509a5749e414
                        }
                        off2 := v.Args[0].Aux
-                       val := v.Args[1]
+                       ptr := v.Args[0].Args[0]
+                       idx := v.Args[1]
                        mem := v.Args[2]
-                       v.Op = OpMOVQstoreFP
+                       v.Op = OpMOVQloadidx8
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
-                       v.AddArg(val)
+                       v.Aux = addOff(off1, off2)
+                       v.AddArg(ptr)
+                       v.AddArg(idx)
                        v.AddArg(mem)
                        return true
                }
-               goto end0a2a81a20558dfc93790aecb1e9cc81a
-       end0a2a81a20558dfc93790aecb1e9cc81a:
+               goto ende47e8d742e2615f39fb6509a5749e414
+       ende47e8d742e2615f39fb6509a5749e414:
                ;
-               // match: (MOVQstore [off1] (SPAddr [off2]) val mem)
+       case OpMOVQstore:
+               // match: (MOVQstore [off1] (ADDCQ [off2] ptr) val mem)
                // cond:
-               // result: (MOVQstoreSP [off1.(int64)+off2.(int64)] val mem)
+               // result: (MOVQstore [addOff(off1, off2)] ptr val mem)
                {
                        off1 := v.Aux
-                       if v.Args[0].Op != OpSPAddr {
-                               goto end1cb5b7e766f018270fa434c6f46f607f
+                       if v.Args[0].Op != OpADDCQ {
+                               goto enddfd4c7a20fd3b84eb9dcf84b98c661fc
                        }
                        off2 := v.Args[0].Aux
+                       ptr := v.Args[0].Args[0]
                        val := v.Args[1]
                        mem := v.Args[2]
-                       v.Op = OpMOVQstoreSP
-                       v.Aux = nil
-                       v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
-                       v.AddArg(val)
-                       v.AddArg(mem)
-                       return true
-               }
-               goto end1cb5b7e766f018270fa434c6f46f607f
-       end1cb5b7e766f018270fa434c6f46f607f:
-               ;
-               // match: (MOVQstore [off] (Global [sym]) val mem)
-               // cond:
-               // result: (MOVQstoreglobal [GlobalOffset{sym,off.(int64)}] val mem)
-               {
-                       off := v.Aux
-                       if v.Args[0].Op != OpGlobal {
-                               goto end657d07e37c720a8fbb108a31bb48090d
-                       }
-                       sym := v.Args[0].Aux
-                       val := v.Args[1]
-                       mem := v.Args[2]
-                       v.Op = OpMOVQstoreglobal
+                       v.Op = OpMOVQstore
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = GlobalOffset{sym, off.(int64)}
+                       v.Aux = addOff(off1, off2)
+                       v.AddArg(ptr)
                        v.AddArg(val)
                        v.AddArg(mem)
                        return true
                }
-               goto end657d07e37c720a8fbb108a31bb48090d
-       end657d07e37c720a8fbb108a31bb48090d:
+               goto enddfd4c7a20fd3b84eb9dcf84b98c661fc
+       enddfd4c7a20fd3b84eb9dcf84b98c661fc:
                ;
-               // match: (MOVQstore [off1] (ADDCQ [off2] ptr) val mem)
+               // match: (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem)
                // cond:
-               // result: (MOVQstore [off1.(int64)+off2.(int64)] ptr val mem)
+               // result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
                {
                        off1 := v.Aux
-                       if v.Args[0].Op != OpADDCQ {
-                               goto end271e3052de832e22b1f07576af2854de
+                       if v.Args[0].Op != OpLEAQ8 {
+                               goto endce1db8c8d37c8397c500a2068a65c215
                        }
                        off2 := v.Args[0].Aux
                        ptr := v.Args[0].Args[0]
+                       idx := v.Args[0].Args[1]
                        val := v.Args[1]
                        mem := v.Args[2]
-                       v.Op = OpMOVQstore
+                       v.Op = OpMOVQstoreidx8
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
+                       v.Aux = addOff(off1, off2)
                        v.AddArg(ptr)
+                       v.AddArg(idx)
                        v.AddArg(val)
                        v.AddArg(mem)
                        return true
                }
-               goto end271e3052de832e22b1f07576af2854de
-       end271e3052de832e22b1f07576af2854de:
+               goto endce1db8c8d37c8397c500a2068a65c215
+       endce1db8c8d37c8397c500a2068a65c215:
                ;
-               // match: (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem)
+       case OpMOVQstoreidx8:
+               // match: (MOVQstoreidx8 [off1] (ADDCQ [off2] ptr) idx val mem)
                // cond:
-               // result: (MOVQstoreidx8 [off1.(int64)+off2.(int64)] ptr idx val mem)
+               // result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
                {
                        off1 := v.Aux
-                       if v.Args[0].Op != OpLEAQ8 {
-                               goto end4ad469f534c7369f6ac36bdace3462ad
+                       if v.Args[0].Op != OpADDCQ {
+                               goto endcdb222707a568ad468f7fff2fc42fc39
                        }
                        off2 := v.Args[0].Aux
                        ptr := v.Args[0].Args[0]
-                       idx := v.Args[0].Args[1]
-                       val := v.Args[1]
-                       mem := v.Args[2]
+                       idx := v.Args[1]
+                       val := v.Args[2]
+                       mem := v.Args[3]
                        v.Op = OpMOVQstoreidx8
                        v.Aux = nil
                        v.resetArgs()
-                       v.Aux = off1.(int64) + off2.(int64)
+                       v.Aux = addOff(off1, off2)
                        v.AddArg(ptr)
                        v.AddArg(idx)
                        v.AddArg(val)
                        v.AddArg(mem)
                        return true
                }
-               goto end4ad469f534c7369f6ac36bdace3462ad
-       end4ad469f534c7369f6ac36bdace3462ad:
+               goto endcdb222707a568ad468f7fff2fc42fc39
+       endcdb222707a568ad468f7fff2fc42fc39:
                ;
        case OpMULCQ:
                // match: (MULCQ [c] x)
@@ -538,13 +501,13 @@ func lowerAmd64(v *Value) bool {
        end90a1c055d9658aecacce5e101c1848b4:
                ;
        case OpMULQ:
-               // match: (MULQ x (Const [c]))
+               // match: (MULQ x (MOVQconst [c]))
                // cond:
                // result: (MULCQ [c] x)
                {
                        x := v.Args[0]
-                       if v.Args[1].Op != OpConst {
-                               goto endc427f4838d2e83c00cc097b20bd20a37
+                       if v.Args[1].Op != OpMOVQconst {
+                               goto endce35d001482ea209e62e9394bd07c7cb
                        }
                        c := v.Args[1].Aux
                        v.Op = OpMULCQ
@@ -554,15 +517,15 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(x)
                        return true
                }
-               goto endc427f4838d2e83c00cc097b20bd20a37
-       endc427f4838d2e83c00cc097b20bd20a37:
+               goto endce35d001482ea209e62e9394bd07c7cb
+       endce35d001482ea209e62e9394bd07c7cb:
                ;
-               // match: (MULQ (Const [c]) x)
+               // match: (MULQ (MOVQconst [c]) x)
                // cond:
                // result: (MULCQ [c] x)
                {
-                       if v.Args[0].Op != OpConst {
-                               goto endd70de938e71150d1c9e8173c2a5b2d95
+                       if v.Args[0].Op != OpMOVQconst {
+                               goto end804f58b1f6a7cce19d48379999ec03f1
                        }
                        c := v.Args[0].Aux
                        x := v.Args[1]
@@ -573,8 +536,32 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(x)
                        return true
                }
-               goto endd70de938e71150d1c9e8173c2a5b2d95
-       endd70de938e71150d1c9e8173c2a5b2d95:
+               goto end804f58b1f6a7cce19d48379999ec03f1
+       end804f58b1f6a7cce19d48379999ec03f1:
+               ;
+       case OpMove:
+               // match: (Move [size] dst src mem)
+               // cond:
+               // result: (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem)
+               {
+                       size := v.Aux
+                       dst := v.Args[0]
+                       src := v.Args[1]
+                       mem := v.Args[2]
+                       v.Op = OpREPMOVSB
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.AddArg(dst)
+                       v.AddArg(src)
+                       v0 := v.Block.NewValue(OpConst, TypeInvalid, nil)
+                       v0.Type = TypeUInt64
+                       v0.Aux = size.(int64)
+                       v.AddArg(v0)
+                       v.AddArg(mem)
+                       return true
+               }
+               goto end48909259b265a6bb2a076bc2c2dc7d1f
+       end48909259b265a6bb2a076bc2c2dc7d1f:
                ;
        case OpMul:
                // match: (Mul <t> x y)
@@ -597,6 +584,23 @@ func lowerAmd64(v *Value) bool {
                goto endfab0d598f376ecba45a22587d50f7aff
        endfab0d598f376ecba45a22587d50f7aff:
                ;
+       case OpOffPtr:
+               // match: (OffPtr [off] ptr)
+               // cond:
+               // result: (ADDCQ [off] ptr)
+               {
+                       off := v.Aux
+                       ptr := v.Args[0]
+                       v.Op = OpADDCQ
+                       v.Aux = nil
+                       v.resetArgs()
+                       v.Aux = off
+                       v.AddArg(ptr)
+                       return true
+               }
+               goto endfe8f713b1d237a23311fb721ee46bedb
+       endfe8f713b1d237a23311fb721ee46bedb:
+               ;
        case OpSETL:
                // match: (SETL (InvertFlags x))
                // cond:
@@ -616,13 +620,13 @@ func lowerAmd64(v *Value) bool {
        end456c7681d48305698c1ef462d244bdc6:
                ;
        case OpSUBQ:
-               // match: (SUBQ x (Const [c]))
+               // match: (SUBQ x (MOVQconst [c]))
                // cond:
                // result: (SUBCQ x [c])
                {
                        x := v.Args[0]
-                       if v.Args[1].Op != OpConst {
-                               goto endb31e242f283867de4722665a5796008c
+                       if v.Args[1].Op != OpMOVQconst {
+                               goto endc96cd1cb2dd98427c34fb9543feca4fe
                        }
                        c := v.Args[1].Aux
                        v.Op = OpSUBCQ
@@ -632,16 +636,16 @@ func lowerAmd64(v *Value) bool {
                        v.Aux = c
                        return true
                }
-               goto endb31e242f283867de4722665a5796008c
-       endb31e242f283867de4722665a5796008c:
+               goto endc96cd1cb2dd98427c34fb9543feca4fe
+       endc96cd1cb2dd98427c34fb9543feca4fe:
                ;
-               // match: (SUBQ <t> (Const [c]) x)
+               // match: (SUBQ <t> (MOVQconst [c]) x)
                // cond:
                // result: (NEGQ (SUBCQ <t> x [c]))
                {
                        t := v.Type
-                       if v.Args[0].Op != OpConst {
-                               goto end569cc755877d1f89a701378bec05c08d
+                       if v.Args[0].Op != OpMOVQconst {
+                               goto end900aaaf28cefac6bb62e76b5151611cf
                        }
                        c := v.Args[0].Aux
                        x := v.Args[1]
@@ -655,8 +659,8 @@ func lowerAmd64(v *Value) bool {
                        v.AddArg(v0)
                        return true
                }
-               goto end569cc755877d1f89a701378bec05c08d
-       end569cc755877d1f89a701378bec05c08d:
+               goto end900aaaf28cefac6bb62e76b5151611cf
+       end900aaaf28cefac6bb62e76b5151611cf:
                ;
        case OpStore:
                // match: (Store ptr val mem)
index ebe4a8e7471ef54d3c6c65cc8a9eec0feefe10d0..e0dc531fc9bd9691ac4e73738cce90b9445509d6 100644 (file)
@@ -4,6 +4,8 @@
 
 package ssa
 
+import "fmt"
+
 // An Op encodes the specific operation that a Value performs.
 // Opcodes' semantics can be modified by the type and aux fields of the Value.
 // For instance, OpAdd can be 32 or 64 bit, signed or unsigned, float or complex, depending on Value.Type.
@@ -47,8 +49,11 @@ const (
        OpArg    // address of a function parameter/result.  Memory input is an arg called ".mem".  aux is a string (TODO: make it something other than a string?)
        OpGlobal // the address of a global variable aux.(*gc.Sym)
        OpFunc   // entry address of a function
+       OpFP     // frame pointer
+       OpSP     // stack pointer
 
        OpCopy // output = arg0
+       OpMove // arg0=destptr, arg1=srcptr, arg2=mem, aux.(int64)=size.  Returns memory.
        OpPhi  // select an argument based on which predecessor block we came from
 
        OpSliceMake // arg0=ptr, arg1=len, arg2=cap
@@ -62,7 +67,8 @@ const (
 
        OpLoad       // Load from arg0+aux.(int64).  arg1=memory
        OpStore      // Store arg1 to arg0+aux.(int64).  arg2=memory.  Returns memory.
-       OpSliceIndex // arg0=slice, arg1=index, arg2=memory
+       OpArrayIndex // arg0=array, arg1=index.  Returns a[i]
+       OpPtrIndex   // arg0=ptr, arg1=index. Computes ptr+sizeof(*v.type)*index, where index is extended to ptrwidth type
        OpIsNonNil   // arg0 != nil
        OpIsInBounds // 0 <= arg0 < arg1
 
@@ -75,6 +81,8 @@ const (
        OpConvert // convert arg0 to another type
        OpConvNop // interpret arg0 as another type
 
+       OpOffPtr // arg0 + aux.(int64) (arg0 and result are pointers)
+
        // These ops return a pointer to a location on the stack.
        OpFPAddr // FP + aux.(int64) (+ == args from caller, - == locals)
        OpSPAddr // SP + aux.(int64)
@@ -96,6 +104,15 @@ type GlobalOffset struct {
        Offset int64
 }
 
+// offset adds x to the location specified by g and returns it.
+func (g GlobalOffset) offset(x int64) GlobalOffset {
+       return GlobalOffset{g.Global, g.Offset + x}
+}
+
+func (g GlobalOffset) String() string {
+       return fmt.Sprintf("%v+%d", g.Global, g.Offset)
+}
+
 //go:generate stringer -type=Op
 
 type opInfo struct {
index 0851cfe0fb387e115920c3d82549f7fa772d7d47..9b22f664efb19fd696c3c436b563bcb2d0dc27a1 100644 (file)
@@ -6,16 +6,16 @@ import "fmt"
 
 const (
        _Op_name_0 = "opInvalid"
-       _Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpSliceIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpFwdRef"
-       _Op_name_2 = "opAMD64BaseOpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpADDLOpCMPQOpCMPCQOpTESTQOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQloadFPOpMOVQloadSPOpMOVQstoreFPOpMOVQstoreSPOpMOVQconst"
+       _Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpFPOpSPOpCopyOpMoveOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpArrayIndexOpPtrIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpOffPtrOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpFwdRef"
+       _Op_name_2 = "opAMD64BaseOpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpADDLOpCMPQOpCMPCQOpTESTQOpTESTBOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLEAQglobalOpMOVBloadOpMOVBQZXloadOpMOVBQSXloadOpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQconstOpREPMOVSB"
        _Op_name_3 = "op386Base"
        _Op_name_4 = "opMax"
 )
 
 var (
        _Op_index_0 = [...]uint8{0, 9}
-       _Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 34, 41, 46, 54, 60, 66, 71, 82, 92, 102, 112, 124, 135, 146, 152, 159, 171, 181, 193, 199, 211, 220, 229, 237, 245, 256, 266, 274}
-       _Op_index_2 = [...]uint16{0, 11, 17, 23, 30, 37, 43, 50, 56, 63, 69, 75, 81, 88, 95, 102, 109, 115, 122, 128, 141, 147, 154, 161, 168, 178, 189, 203, 218, 234, 251, 263, 275, 288, 301, 312}
+       _Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 34, 41, 46, 54, 60, 64, 68, 74, 80, 85, 96, 106, 116, 126, 138, 149, 160, 166, 173, 185, 195, 205, 217, 223, 235, 244, 253, 261, 269, 277, 288, 298, 306}
+       _Op_index_2 = [...]uint16{0, 11, 17, 23, 30, 37, 43, 50, 56, 63, 69, 75, 81, 88, 95, 102, 109, 116, 122, 129, 135, 148, 154, 161, 168, 175, 187, 197, 210, 223, 233, 244, 258, 273, 289, 306, 317, 327}
        _Op_index_3 = [...]uint8{0, 9}
        _Op_index_4 = [...]uint8{0, 5}
 )
@@ -24,10 +24,10 @@ func (i Op) String() string {
        switch {
        case i == 0:
                return _Op_name_0
-       case 1001 <= i && i <= 1032:
+       case 1001 <= i && i <= 1037:
                i -= 1001
                return _Op_name_1[_Op_index_1[i]:_Op_index_1[i+1]]
-       case 2001 <= i && i <= 2035:
+       case 2001 <= i && i <= 2037:
                i -= 2001
                return _Op_name_2[_Op_index_2[i]:_Op_index_2[i+1]]
        case i == 3001:
index 8bdd19f7130e96131e75fe337081f6c5badcbe0d..46f0a69dfb25d583a8962faefb83a8ae739f2b4e 100644 (file)
@@ -30,6 +30,7 @@ const (
        OpCMPQ  // arg0 compare to arg1
        OpCMPCQ // arg0 compare to aux.(int64)
        OpTESTQ // (arg0 & arg1) compare to 0
+       OpTESTB // (arg0 & arg1) compare to 0
 
        // These opcodes extract a particular boolean condition from a flags value.
        OpSETEQ // extract == condition from arg0
@@ -43,29 +44,30 @@ const (
        // This is a pseudo-op which can't appear in assembly output.
        OpInvertFlags // reverse direction of arg0
 
-       OpLEAQ  // arg0 + arg1 + aux.(int64)
-       OpLEAQ2 // arg0 + 2*arg1 + aux.(int64)
-       OpLEAQ4 // arg0 + 4*arg1 + aux.(int64)
-       OpLEAQ8 // arg0 + 8*arg1 + aux.(int64)
+       OpLEAQ       // arg0 + arg1 + aux.(int64)
+       OpLEAQ2      // arg0 + 2*arg1 + aux.(int64)
+       OpLEAQ4      // arg0 + 4*arg1 + aux.(int64)
+       OpLEAQ8      // arg0 + 8*arg1 + aux.(int64)
+       OpLEAQglobal // no args.  address of aux.(GlobalOffset)
 
        // Load/store from general address
-       OpMOVQload      // Load from arg0+aux.(int64).  arg1=memory
+       OpMOVBload // Load from arg0+aux.(int64).  arg1=memory
+       OpMOVBQZXload
+       OpMOVBQSXload
+       OpMOVQload
        OpMOVQstore     // Store arg1 to arg0+aux.(int64).  arg2=memory, returns memory.
        OpMOVQloadidx8  // Load from arg0+arg1*8+aux.(int64).  arg2=memory
        OpMOVQstoreidx8 // Store arg2 to arg0+arg1*8+aux.(int64).  arg3=memory, returns memory.
 
-       // Load/store from global.  aux.(GlobalOffset) encodes the global location.
+       // Load/store from global.  Same as the above loads, but arg0 is missing and aux is a GlobalOffset instead of an int64.
        OpMOVQloadglobal  // arg0 = memory
        OpMOVQstoreglobal // store arg0.  arg1=memory, returns memory.
 
-       // Load/store from stack slot.
-       OpMOVQloadFP  // load from FP+aux.(int64).  arg0=memory
-       OpMOVQloadSP  // load from SP+aux.(int64).  arg0=memory
-       OpMOVQstoreFP // store arg0 to FP+aux.(int64).  arg1=memory, returns memory.
-       OpMOVQstoreSP // store arg0 to SP+aux.(int64).  arg1=memory, returns memory.
-
        // materialize a constant into a register
        OpMOVQconst // (takes no arguments)
+
+       // move memory
+       OpREPMOVSB // arg0=destptr, arg1=srcptr, arg2=len, arg3=mem
 )
 
 type regMask uint64
@@ -89,13 +91,16 @@ var regsAMD64 = [...]string{
        "R15",
 
        // pseudo registers
+       "FP",
        "FLAGS",
        "OVERWRITE0", // the same register as the first input
 }
 
-var gp regMask = 0xef // all integer registers except SP
-var cx regMask = 0x2
-var flags regMask = 1 << 16
+var gp regMask = 0x1ffff // all integer registers (including SP&FP)
+var cx regMask = 1 << 1
+var si regMask = 1 << 6
+var di regMask = 1 << 7
+var flags regMask = 1 << 17
 
 var (
        // gp = general purpose (integer) registers
@@ -129,13 +134,16 @@ var amd64Table = map[Op]opInfo{
        OpCMPQ:  {asm: "CMPQ\t%I0,%I1", reg: gp2_flags}, // compute arg[0]-arg[1] and produce flags
        OpCMPCQ: {asm: "CMPQ\t$%A,%I0", reg: gp1_flags},
        OpTESTQ: {asm: "TESTQ\t%I0,%I1", reg: gp2_flags},
+       OpTESTB: {asm: "TESTB\t%I0,%I1", reg: gp2_flags},
 
-       OpLEAQ:  {flags: OpFlagCommutative, asm: "LEAQ\t%A(%I0)(%I1*1),%O0", reg: gp21}, // aux = int64 constant to add
-       OpLEAQ2: {asm: "LEAQ\t%A(%I0)(%I1*2),%O0"},
-       OpLEAQ4: {asm: "LEAQ\t%A(%I0)(%I1*4),%O0"},
-       OpLEAQ8: {asm: "LEAQ\t%A(%I0)(%I1*8),%O0"},
+       OpLEAQ:       {flags: OpFlagCommutative, asm: "LEAQ\t%A(%I0)(%I1*1),%O0", reg: gp21}, // aux = int64 constant to add
+       OpLEAQ2:      {asm: "LEAQ\t%A(%I0)(%I1*2),%O0"},
+       OpLEAQ4:      {asm: "LEAQ\t%A(%I0)(%I1*4),%O0"},
+       OpLEAQ8:      {asm: "LEAQ\t%A(%I0)(%I1*8),%O0"},
+       OpLEAQglobal: {asm: "LEAQ\t%A(SB),%O0", reg: gp01},
 
        // loads and stores
+       OpMOVBload:      {asm: "MOVB\t%A(%I0),%O0", reg: gpload},
        OpMOVQload:      {asm: "MOVQ\t%A(%I0),%O0", reg: gpload},
        OpMOVQstore:     {asm: "MOVQ\t%I1,%A(%I0)", reg: gpstore},
        OpMOVQloadidx8:  {asm: "MOVQ\t%A(%I0)(%I1*8),%O0", reg: gploadidx},
@@ -145,23 +153,20 @@ var amd64Table = map[Op]opInfo{
 
        OpStaticCall: {asm: "CALL\t%A(SB)"},
 
-       OpCopy: {asm: "MOVQ\t%I0,%O0", reg: gp11},
+       OpCopy:    {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific
+       OpConvNop: {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific.  Or get rid of this altogether.
 
        // convert from flags back to boolean
        OpSETL: {},
 
-       // ops for load/store to stack
-       OpMOVQloadFP:  {asm: "MOVQ\t%A(FP),%O0", reg: gpload_stack},  // mem -> value
-       OpMOVQloadSP:  {asm: "MOVQ\t%A(SP),%O0", reg: gpload_stack},  // mem -> value
-       OpMOVQstoreFP: {asm: "MOVQ\t%I0,%A(FP)", reg: gpstore_stack}, // mem, value -> mem
-       OpMOVQstoreSP: {asm: "MOVQ\t%I0,%A(SP)", reg: gpstore_stack}, // mem, value -> mem
-
        // ops for spilling of registers
        // unlike regular loads & stores, these take no memory argument.
        // They are just like OpCopy but we use them during register allocation.
        // TODO: different widths, float
        OpLoadReg8:  {asm: "MOVQ\t%I0,%O0"},
        OpStoreReg8: {asm: "MOVQ\t%I0,%O0"},
+
+       OpREPMOVSB: {asm: "REP MOVSB", reg: [2][]regMask{{di, si, cx, 0}, {0}}}, // TODO: record that si/di/cx are clobbered
 }
 
 func init() {
index e2de10896efad121197183a2edf307f0ec495f87..c798d2e936591e0c5a59d55b125fd172ef2a3ba3 100644 (file)
@@ -39,8 +39,9 @@ var registers = [...]Register{
 
        // TODO X0, ...
        // TODO: make arch-dependent
-       Register{16, "FLAGS"},
-       Register{17, "OVERWRITE"},
+       Register{16, "FP"}, // pseudo-register, actually a constant offset from SP
+       Register{17, "FLAGS"},
+       Register{18, "OVERWRITE"},
 }
 
 // countRegs returns the number of set bits in the register mask.
@@ -84,6 +85,19 @@ func regalloc(f *Func) {
 
        var oldSched []*Value
 
+       // Hack to find fp, sp Values and assign them a register. (TODO: make not so hacky)
+       var fp, sp *Value
+       for _, v := range f.Entry.Values {
+               switch v.Op {
+               case OpSP:
+                       sp = v
+                       home = setloc(home, v, &registers[4]) // TODO: arch-dependent
+               case OpFP:
+                       fp = v
+                       home = setloc(home, v, &registers[16]) // TODO: arch-dependent
+               }
+       }
+
        // Register allocate each block separately.  All live values will live
        // in home locations (stack slots) between blocks.
        for _, b := range f.Blocks {
@@ -115,6 +129,10 @@ func regalloc(f *Func) {
                }
                regs := make([]regInfo, numRegs)
 
+               // TODO: hack: initialize fixed registers
+               regs[4] = regInfo{sp, sp, false}
+               regs[16] = regInfo{fp, fp, false}
+
                var used regMask  // has a 1 for each non-nil entry in regs
                var dirty regMask // has a 1 for each dirty entry in regs
 
@@ -133,9 +151,6 @@ func regalloc(f *Func) {
                        //   - definition of v.  c will be identical to v but will live in
                        //     a register.  v will be modified into a spill of c.
                        regspec := opcodeTable[v.Op].reg
-                       if v.Op == OpConvNop {
-                               regspec = opcodeTable[v.Args[0].Op].reg
-                       }
                        inputs := regspec[0]
                        outputs := regspec[1]
                        if len(inputs) == 0 && len(outputs) == 0 {
@@ -154,6 +169,7 @@ func regalloc(f *Func) {
                        // nospill contains registers that we can't spill because
                        // we already set them up for use by the current instruction.
                        var nospill regMask
+                       nospill |= 0x10010 // SP and FP can't be spilled (TODO: arch-specific)
 
                        // Move inputs into registers
                        for _, o := range order {
@@ -215,10 +231,16 @@ func regalloc(f *Func) {
 
                                        // Load w into this register
                                        var c *Value
-                                       if w.Op == OpConst {
+                                       if len(w.Args) == 0 {
                                                // Materialize w
-                                               // TODO: arch-specific MOV op
-                                               c = b.NewValue(OpMOVQconst, w.Type, w.Aux)
+                                               if w.Op == OpFP || w.Op == OpSP || w.Op == OpGlobal {
+                                                       c = b.NewValue1(OpCopy, w.Type, nil, w)
+                                               } else {
+                                                       c = b.NewValue(w.Op, w.Type, w.Aux)
+                                               }
+                                       } else if len(w.Args) == 1 && (w.Args[0].Op == OpFP || w.Args[0].Op == OpSP || w.Args[0].Op == OpGlobal) {
+                                               // Materialize offsets from SP/FP/Global
+                                               c = b.NewValue1(w.Op, w.Type, w.Aux, w.Args[0])
                                        } else if wreg != 0 {
                                                // Copy from another register.
                                                // Typically just an optimization, but this is
@@ -317,6 +339,10 @@ func regalloc(f *Func) {
                        v := regs[r].v
                        c := regs[r].c
                        if lastUse[v.ID] <= len(oldSched) {
+                               if v == v.Block.Control {
+                                       // link control value to register version
+                                       v.Block.Control = c
+                               }
                                continue // not live after block
                        }
 
@@ -334,6 +360,7 @@ func regalloc(f *Func) {
                }
        }
        f.RegAlloc = home
+       deadcode(f) // remove values that had all of their uses rematerialized.  TODO: separate pass?
 }
 
 // addPhiCopies adds copies of phi inputs in the blocks
index 855719a8771eea14d183bb7176c68abdaf8d39a7..75e910d690898c8a2f98dd725e2e557f59770a3f 100644 (file)
@@ -4,14 +4,14 @@
 
 package ssa
 
-import "fmt"
+import "log"
 
 func applyRewrite(f *Func, r func(*Value) bool) {
        // repeat rewrites until we find no more rewrites
        var curv *Value
        defer func() {
                if curv != nil {
-                       fmt.Printf("panic during rewrite of %s\n", curv.LongString())
+                       log.Printf("panic during rewrite of %s\n", curv.LongString())
                        // TODO(khr): print source location also
                }
        }()
@@ -19,6 +19,18 @@ func applyRewrite(f *Func, r func(*Value) bool) {
                change := false
                for _, b := range f.Blocks {
                        for _, v := range b.Values {
+                               // elide any copies generated during rewriting
+                               for i, a := range v.Args {
+                                       if a.Op != OpCopy {
+                                               continue
+                                       }
+                                       for a.Op == OpCopy {
+                                               a = a.Args[0]
+                                       }
+                                       v.Args[i] = a
+                               }
+
+                               // apply rewrite function
                                curv = v
                                if r(v) {
                                        change = true
@@ -26,6 +38,7 @@ func applyRewrite(f *Func, r func(*Value) bool) {
                        }
                }
                if !change {
+                       curv = nil
                        return
                }
        }
@@ -52,3 +65,19 @@ func isSigned(t Type) bool {
 func typeSize(t Type) int64 {
        return t.Size()
 }
+
+// addOff adds two offset aux values.  Each should be an int64.  Fails if wraparound happens.
+func addOff(a, b interface{}) interface{} {
+       x := a.(int64)
+       y := b.(int64)
+       z := x + y
+       // x and y have same sign and z has a different sign => overflow
+       if x^y >= 0 && x^z < 0 {
+               log.Panicf("offset overflow %d %d\n", x, y)
+       }
+       return z
+}
+
+func inBounds(idx, len int64) bool {
+       return idx >= 0 && idx < len
+}
index d17449930f4d1e480ac4a7a3fb895e0bdf1cf8b7..c49d9d9f2e0068326cef05455cfd637cf0ed2bd8 100644 (file)
@@ -3,17 +3,22 @@
 // license that can be found in the LICENSE file.
 
 // constant folding
-(Add <t> (Const [c]) (Const [d])) && is64BitInt(t) && isSigned(t) -> (Const [{c.(int64)+d.(int64)}])
-(Add <t> (Const [c]) (Const [d])) && is64BitInt(t) && !isSigned(t) -> (Const [{c.(uint64)+d.(uint64)}])
+(Add <t> (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)+d.(int64)}])
+(Mul <t> (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)*d.(int64)}])
+(IsInBounds (Const [c]) (Const [d])) -> (Const [inBounds(c.(int64),d.(int64))])
 
 // tear apart slices
 // TODO: anything that generates a slice needs to go in here.
 (SlicePtr (Load ptr mem)) -> (Load ptr mem)
-(SliceLen (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.UIntPtr> [int64(v.Block.Func.Config.ptrSize)])) mem)
-(SliceCap (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.UIntPtr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
-
-// expand array indexing
-// others?  Depends on what is already done by frontend
+(SliceLen (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize)])) mem)
+(SliceCap (Load ptr mem)) -> (Load (Add <ptr.Type> ptr (Const <v.Block.Func.Config.Uintptr> [int64(v.Block.Func.Config.ptrSize*2)])) mem)
 
+// indexing operations
 // Note: bounds check has already been done
-(SliceIndex s i mem) -> (Load (Add <s.Type.Elem().PtrTo()> (SlicePtr <s.Type.Elem().PtrTo()> s) (Mul <v.Block.Func.Config.UIntPtr> i (Const <v.Block.Func.Config.UIntPtr> [s.Type.Elem().Size()]))) mem)
+(ArrayIndex (Load ptr mem) idx) -> (Load (PtrIndex <ptr.Type.Elem().Elem().PtrTo()> ptr idx) mem)
+(PtrIndex <t> ptr idx) -> (Add ptr (Mul <v.Block.Func.Config.Uintptr> idx (Const <v.Block.Func.Config.Uintptr> [t.Elem().Size()])))
+// TODO: hopefully this will get rid of all full-width array copies.
+
+// big-object moves
+// TODO: fix size
+(Store dst (Load <t> src mem) mem) && t.Size() > 8 -> (Move [t.Size()] dst src mem)
index 55267d684287a18869b86568c85b8c33d1e2bdbd..0fed21e740f5730b257bd708bbdcc0855150fa37 100644 (file)
@@ -30,6 +30,7 @@
 
 (Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ <TypeFlags> x y))
 
+(Load <t> ptr mem) && t.IsBoolean() -> (MOVBload [int64(0)] ptr mem)
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload [int64(0)] ptr mem)
 (Store ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVQstore [int64(0)] ptr val mem)
 
 (IsNonNil p) -> (SETNE (TESTQ <TypeFlags> p p))
 (IsInBounds idx len) -> (SETB (CMPQ <TypeFlags> idx len))
 
+(Move [size] dst src mem) -> (REPMOVSB dst src (Const <TypeUInt64> [size.(int64)]) mem)
+
+(OffPtr [off] ptr) -> (ADDCQ [off] ptr)
+
+(Const <t> [val]) && is64BitInt(t) -> (MOVQconst [val])
+
 // Rules below here apply some simple optimizations after lowering.
 // TODO: Should this be a separate pass?
 
-// stack loads/stores
-(MOVQload [off1] (FPAddr [off2]) mem) -> (MOVQloadFP [off1.(int64)+off2.(int64)] mem)
-(MOVQload [off1] (SPAddr [off2]) mem) -> (MOVQloadSP [off1.(int64)+off2.(int64)] mem)
-(MOVQstore [off1] (FPAddr [off2]) val mem) -> (MOVQstoreFP [off1.(int64)+off2.(int64)] val mem)
-(MOVQstore [off1] (SPAddr [off2]) val mem) -> (MOVQstoreSP [off1.(int64)+off2.(int64)] val mem)
-
 // global loads/stores
-(MOVQload [off] (Global [sym]) mem) -> (MOVQloadglobal [GlobalOffset{sym,off.(int64)}] mem)
-(MOVQstore [off] (Global [sym]) val mem) -> (MOVQstoreglobal [GlobalOffset{sym,off.(int64)}] val mem)
+(Global [sym]) -> (LEAQglobal [GlobalOffset{sym,0}])
 
 // fold constants into instructions
-(ADDQ x (Const [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range?
-(ADDQ (Const [c]) x) -> (ADDCQ [c] x)
-(SUBQ x (Const [c])) -> (SUBCQ x [c])
-(SUBQ <t> (Const [c]) x) -> (NEGQ (SUBCQ <t> x [c]))
-(MULQ x (Const [c])) -> (MULCQ [c] x)
-(MULQ (Const [c]) x) -> (MULCQ [c] x)
-(CMPQ x (Const [c])) -> (CMPCQ x [c])
-(CMPQ (Const [c]) x) -> (InvertFlags (CMPCQ <TypeFlags> x [c]))
+(ADDQ x (MOVQconst [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range?
+(ADDQ (MOVQconst [c]) x) -> (ADDCQ [c] x)
+(SUBQ x (MOVQconst [c])) -> (SUBCQ x [c])
+(SUBQ <t> (MOVQconst [c]) x) -> (NEGQ (SUBCQ <t> x [c]))
+(MULQ x (MOVQconst [c])) -> (MULCQ [c] x)
+(MULQ (MOVQconst [c]) x) -> (MULCQ [c] x)
+(CMPQ x (MOVQconst [c])) -> (CMPCQ x [c])
+(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPCQ <TypeFlags> x [c]))
 
 // strength reduction
 // TODO: do this a lot more generically
@@ -66,7 +66,7 @@
 
 // fold add/shift into leaq
 (ADDQ x (SHLCQ [shift] y)) && shift.(int64) == 3 -> (LEAQ8 [int64(0)] x y)
-(ADDCQ [c] (LEAQ8 [d] x y)) -> (LEAQ8 [c.(int64)+d.(int64)] x y)
+(ADDCQ [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y)
 
 // reverse ordering of compare instruction
 (SETL (InvertFlags x)) -> (SETGE x)
 // the ADDCQ get eliminated, we still have to compute the ADDCQ and we now
 // have potentially two live values (ptr and (ADDCQ [off] ptr)) instead of one.
 // Nevertheless, let's do it!
-(MOVQload [off1] (ADDCQ [off2] ptr) mem) -> (MOVQload [off1.(int64)+off2.(int64)] ptr mem)
-(MOVQstore [off1] (ADDCQ [off2] ptr) val mem) -> (MOVQstore [off1.(int64)+off2.(int64)] ptr val mem)
+(MOVQload [off1] (ADDCQ [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem)
+(MOVQstore [off1] (ADDCQ [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem)
 
 // indexed loads and stores
-(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [off1.(int64)+off2.(int64)] ptr idx mem)
-(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [off1.(int64)+off2.(int64)] ptr idx val mem)
+(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
+
+(MOVQloadidx8 [off1] (ADDCQ [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem)
+(MOVQstoreidx8 [off1] (ADDCQ [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem)
 
-// Combine the offset of a stack object with the offset within a stack object
-(ADDCQ [off1] (FPAddr [off2])) -> (FPAddr [off1.(int64)+off2.(int64)])
-(ADDCQ [off1] (SPAddr [off2])) -> (SPAddr [off1.(int64)+off2.(int64)])
+(ADDCQ [off] x) && off.(int64) == 0 -> (Copy x)
index 31f46f7cce26c4dc7c24dd878e542f622e0f778c..4ac930298b2f33a967603cae6709520a81c3754c 100644 (file)
@@ -245,6 +245,12 @@ func genResult(w io.Writer, result string) {
 func genResult0(w io.Writer, result string, alloc *int, top bool) string {
        if result[0] != '(' {
                // variable
+               if top {
+                       fmt.Fprintf(w, "v.Op = %s.Op\n", result)
+                       fmt.Fprintf(w, "v.Aux = %s.Aux\n", result)
+                       fmt.Fprintf(w, "v.resetArgs()\n")
+                       fmt.Fprintf(w, "v.AddArgs(%s.Args...)\n", result)
+               }
                return result
        }
 
@@ -297,20 +303,33 @@ func split(s string) []string {
 
 outer:
        for s != "" {
-               d := 0         // depth of ({[<
-               nonsp := false // found a non-space char so far
+               d := 0               // depth of ({[<
+               var open, close byte // opening and closing markers ({[< or )}]>
+               nonsp := false       // found a non-space char so far
                for i := 0; i < len(s); i++ {
-                       switch s[i] {
-                       case '(', '{', '[', '<':
+                       switch {
+                       case d == 0 && s[i] == '(':
+                               open, close = '(', ')'
                                d++
-                       case ')', '}', ']', '>':
-                               d--
-                       case ' ', '\t':
-                               if d == 0 && nonsp {
+                       case d == 0 && s[i] == '<':
+                               open, close = '<', '>'
+                               d++
+                       case d == 0 && s[i] == '[':
+                               open, close = '[', ']'
+                               d++
+                       case d == 0 && s[i] == '{':
+                               open, close = '{', '}'
+                               d++
+                       case d == 0 && (s[i] == ' ' || s[i] == '\t'):
+                               if nonsp {
                                        r = append(r, strings.TrimSpace(s[:i]))
                                        s = s[i:]
                                        continue outer
                                }
+                       case d > 0 && s[i] == open:
+                               d++
+                       case d > 0 && s[i] == close:
+                               d--
                        default:
                                nonsp = true
                        }
index 4d0359ed8121e5325edd13dcaae51f9726ca7990..8a315e10458f19b4b0d15e9faa083ad04ee82064 100644 (file)
@@ -15,6 +15,9 @@ func stackalloc(f *Func) {
                        if v.Op != OpPhi {
                                continue
                        }
+                       if v.Type.IsMemory() { // TODO: only "regallocable" types
+                               continue
+                       }
                        n += v.Type.Size()
                        // a := v.Type.Align()
                        // n = (n + a - 1) / a * a  TODO
@@ -35,10 +38,11 @@ func stackalloc(f *Func) {
                        if v.Type.IsMemory() { // TODO: only "regallocable" types
                                continue
                        }
-                       if v.Op == OpConst {
-                               // don't allocate space for OpConsts.  They should
-                               // have been rematerialized everywhere.
-                               // TODO: is this the right thing to do?
+                       if len(v.Args) == 0 {
+                               // v will have been materialized wherever it is needed.
+                               continue
+                       }
+                       if len(v.Args) == 1 && (v.Args[0].Op == OpFP || v.Args[0].Op == OpSP || v.Args[0].Op == OpGlobal) {
                                continue
                        }
                        // a := v.Type.Align()