From cfc2aa56b0bf6b7dfb8f38cd2cfbe8799fc5a31a Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 18 May 2015 16:44:20 -0700 Subject: [PATCH] [dev.ssa] cmd/internal/ssa: Handle more instructions + some cleanup Add & as an input op. Add several output ops (loads & stores, TESTB, LEAQglobal, branches, memcopy) Some other small things: - Add exprAddr to builder to generate addresses of expressions. Use it in various places that had ad-hoc code. - Separate out nil & bounds check generation to separate functions. - Add explicit FP and SP ops so we dont need specialized *FP and *SP opcodes. - Fix fallthrough at end of functions with no return values. - rematerialization of more opcodes. Change-Id: I781decfcef9770fb15f0cd6b061547f7824a2d5e Reviewed-on: https://go-review.googlesource.com/10213 Reviewed-by: Alan Donovan --- src/cmd/internal/gc/ssa.go | 371 +++++++++++----- src/cmd/internal/obj/x86/6.out.go | 32 +- src/cmd/internal/ssa/check.go | 6 +- src/cmd/internal/ssa/config.go | 6 +- src/cmd/internal/ssa/generic.go | 189 +++++--- src/cmd/internal/ssa/lower.go | 115 +++-- src/cmd/internal/ssa/lowerAmd64.go | 408 +++++++++--------- src/cmd/internal/ssa/op.go | 19 +- src/cmd/internal/ssa/op_string.go | 12 +- src/cmd/internal/ssa/opamd64.go | 57 +-- src/cmd/internal/ssa/regalloc.go | 43 +- src/cmd/internal/ssa/rewrite.go | 33 +- src/cmd/internal/ssa/rulegen/generic.rules | 21 +- .../internal/ssa/rulegen/lower_amd64.rules | 49 +-- src/cmd/internal/ssa/rulegen/rulegen.go | 35 +- src/cmd/internal/ssa/stackalloc.go | 12 +- 16 files changed, 905 insertions(+), 503 deletions(-) diff --git a/src/cmd/internal/gc/ssa.go b/src/cmd/internal/gc/ssa.go index ec6ad8abcb..8e81163ad4 100644 --- a/src/cmd/internal/gc/ssa.go +++ b/src/cmd/internal/gc/ssa.go @@ -15,7 +15,7 @@ import ( func buildssa(fn *Node) *ssa.Func { dumplist("buildssa", Curfn.Nbody) - var s ssaState + var s state // TODO(khr): build config just once at the start of the compiler binary s.config = ssa.NewConfig(Thearch.Thestring) @@ -33,8 +33,10 @@ func buildssa(fn *Node) *ssa.Func { // Allocate exit block s.exit = s.f.NewBlock(ssa.BlockExit) - // TODO(khr): all args. Make a struct containing args/returnvals, declare - // an FP which contains a pointer to that struct. + // Allocate starting values + s.startmem = s.f.Entry.NewValue(ssa.OpArg, ssa.TypeMem, ".mem") + s.fp = s.f.Entry.NewValue(ssa.OpFP, s.config.Uintptr, nil) // TODO: use generic pointer type (unsafe.Pointer?) instead + s.sp = s.f.Entry.NewValue(ssa.OpSP, s.config.Uintptr, nil) s.vars = map[string]*ssa.Value{} s.labels = map[string]*ssa.Block{} @@ -44,6 +46,11 @@ func buildssa(fn *Node) *ssa.Func { s.startBlock(s.f.Entry) s.stmtList(fn.Nbody) + // fallthrough to exit + if b := s.endBlock(); b != nil { + addEdge(b, s.exit) + } + // Finish up exit block s.startBlock(s.exit) s.exit.Control = s.mem() @@ -58,7 +65,7 @@ func buildssa(fn *Node) *ssa.Func { return s.f } -type ssaState struct { +type state struct { // configuration (arch) information config *ssa.Config @@ -83,10 +90,18 @@ type ssaState struct { // offsets of argument slots // unnamed and unused args are not listed. argOffsets map[string]int64 + + // starting values. Memory, frame pointer, and stack pointer + startmem *ssa.Value + fp *ssa.Value + sp *ssa.Value } // startBlock sets the current block we're generating code in to b. -func (s *ssaState) startBlock(b *ssa.Block) { +func (s *state) startBlock(b *ssa.Block) { + if s.curBlock != nil { + log.Fatalf("starting block %v when block %v has not ended", b, s.curBlock) + } s.curBlock = b s.vars = map[string]*ssa.Value{} } @@ -94,7 +109,7 @@ func (s *ssaState) startBlock(b *ssa.Block) { // endBlock marks the end of generating code for the current block. // Returns the (former) current block. Returns nil if there is no current // block, i.e. if no code flows to the current execution point. -func (s *ssaState) endBlock() *ssa.Block { +func (s *state) endBlock() *ssa.Block { b := s.curBlock if b == nil { return nil @@ -109,14 +124,14 @@ func (s *ssaState) endBlock() *ssa.Block { } // ssaStmtList converts the statement n to SSA and adds it to s. -func (s *ssaState) stmtList(l *NodeList) { +func (s *state) stmtList(l *NodeList) { for ; l != nil; l = l.Next { s.stmt(l.N) } } // ssaStmt converts the statement n to SSA and adds it to s. -func (s *ssaState) stmt(n *Node) { +func (s *state) stmt(n *Node) { s.stmtList(n.Ninit) switch n.Op { @@ -145,35 +160,15 @@ func (s *ssaState) stmt(n *Node) { case OAS: // TODO(khr): colas? val := s.expr(n.Right) - if n.Left.Op == OINDREG { - // indirect off a register (TODO: always SP?) - // used for storing arguments to callees - addr := s.f.Entry.NewValue(ssa.OpSPAddr, Ptrto(n.Right.Type), n.Left.Xoffset) - s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem()) - } else if n.Left.Op != ONAME { - // some more complicated expression. Rewrite to a store. TODO - addr := s.expr(n.Left) // TODO: wrap in & - - // TODO(khr): nil check - s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, n.Right.Type, nil, addr, val, s.mem()) - } else if !n.Left.Addable { - // TODO - log.Fatalf("assignment to non-addable value") - } else if n.Left.Class&PHEAP != 0 { - // TODO - log.Fatalf("assignment to heap value") - } else if n.Left.Class == PEXTERN { - // assign to global variable - addr := s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Left.Type), n.Left.Sym) - s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem()) - } else if n.Left.Class == PPARAMOUT { - // store to parameter slot - addr := s.f.Entry.NewValue(ssa.OpFPAddr, Ptrto(n.Right.Type), n.Left.Xoffset) - s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem()) - } else { - // normal variable + if n.Left.Op == ONAME && !n.Left.Addrtaken && n.Left.Class&PHEAP == 0 && n.Left.Class != PEXTERN && n.Left.Class != PPARAMOUT { + // ssa-able variable. s.vars[n.Left.Sym.Name] = val + return } + // not ssa-able. Treat as a store. + addr := s.addr(n.Left) + s.vars[".mem"] = s.curBlock.NewValue3(ssa.OpStore, ssa.TypeMem, nil, addr, val, s.mem()) + // TODO: try to make more variables registerizeable. case OIF: cond := s.expr(n.Ntest) b := s.endBlock() @@ -254,7 +249,7 @@ func (s *ssaState) stmt(n *Node) { } // expr converts the expression n to ssa, adds it to s and returns the ssa result. -func (s *ssaState) expr(n *Node) *ssa.Value { +func (s *state) expr(n *Node) *ssa.Value { if n == nil { // TODO(khr): is this nil??? return s.f.Entry.NewValue(ssa.OpConst, n.Type, nil) @@ -269,7 +264,6 @@ func (s *ssaState) expr(n *Node) *ssa.Value { } s.argOffsets[n.Sym.Name] = n.Xoffset return s.variable(n.Sym.Name, n.Type) - // binary ops case OLITERAL: switch n.Val.Ctype { case CTINT: @@ -278,6 +272,8 @@ func (s *ssaState) expr(n *Node) *ssa.Value { log.Fatalf("unhandled OLITERAL %v", n.Val.Ctype) return nil } + + // binary ops case OLT: a := s.expr(n.Left) b := s.expr(n.Right) @@ -286,56 +282,36 @@ func (s *ssaState) expr(n *Node) *ssa.Value { a := s.expr(n.Left) b := s.expr(n.Right) return s.curBlock.NewValue2(ssa.OpAdd, a.Type, nil, a, b) - case OSUB: // TODO:(khr) fold code for all binary ops together somehow a := s.expr(n.Left) b := s.expr(n.Right) return s.curBlock.NewValue2(ssa.OpSub, a.Type, nil, a, b) + case OADDR: + return s.addr(n.Left) + case OIND: p := s.expr(n.Left) - c := s.curBlock.NewValue1(ssa.OpIsNonNil, ssa.TypeBool, nil, p) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.Control = c - bNext := s.f.NewBlock(ssa.BlockPlain) - addEdge(b, bNext) - addEdge(b, s.exit) - s.startBlock(bNext) - // TODO(khr): if ptr check fails, don't go directly to exit. - // Instead, go to a call to panicnil or something. - // TODO: implicit nil checks somehow? - + s.nilCheck(p) return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem()) + case ODOTPTR: p := s.expr(n.Left) - // TODO: nilcheck - p = s.curBlock.NewValue2(ssa.OpAdd, p.Type, nil, p, s.f.ConstInt(s.config.UIntPtr, n.Xoffset)) + s.nilCheck(p) + p = s.curBlock.NewValue2(ssa.OpAdd, p.Type, nil, p, s.f.ConstInt(s.config.Uintptr, n.Xoffset)) return s.curBlock.NewValue2(ssa.OpLoad, n.Type, nil, p, s.mem()) case OINDEX: - // TODO: slice vs array? Map index is already reduced to a function call - a := s.expr(n.Left) - i := s.expr(n.Right) - // convert index to full width - // TODO: if index is 64-bit and we're compiling to 32-bit, check that high - // 32 bits are zero (and use a low32 op instead of convnop here). - i = s.curBlock.NewValue1(ssa.OpConvNop, s.config.UIntPtr, nil, i) - - // bounds check - len := s.curBlock.NewValue1(ssa.OpSliceLen, s.config.UIntPtr, nil, a) - cmp := s.curBlock.NewValue2(ssa.OpIsInBounds, ssa.TypeBool, nil, i, len) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.Control = cmp - bNext := s.f.NewBlock(ssa.BlockPlain) - addEdge(b, bNext) - addEdge(b, s.exit) - s.startBlock(bNext) - // TODO: don't go directly to s.exit. Go to a stub that calls panicindex first. - - return s.curBlock.NewValue3(ssa.OpSliceIndex, n.Left.Type.Type, nil, a, i, s.mem()) + if n.Left.Type.Bound >= 0 { // array + a := s.expr(n.Left) + i := s.expr(n.Right) + s.boundsCheck(i, s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound)) + return s.curBlock.NewValue2(ssa.OpArrayIndex, n.Left.Type.Type, nil, a, i) + } else { // slice + p := s.addr(n) + return s.curBlock.NewValue2(ssa.OpLoad, n.Left.Type.Type, nil, p, s.mem()) + } case OCALLFUNC: // run all argument assignments @@ -359,7 +335,7 @@ func (s *ssaState) expr(n *Node) *ssa.Value { s.startBlock(bNext) var titer Iter fp := Structfirst(&titer, Getoutarg(n.Left.Type)) - a := s.f.Entry.NewValue(ssa.OpSPAddr, Ptrto(fp.Type), fp.Width) + a := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(fp.Type), fp.Width, s.sp) return s.curBlock.NewValue2(ssa.OpLoad, fp.Type, nil, a, call) default: log.Fatalf("unhandled expr %s", opnames[n.Op]) @@ -367,8 +343,81 @@ func (s *ssaState) expr(n *Node) *ssa.Value { } } +// expr converts the address of the expression n to SSA, adds it to s and returns the SSA result. +func (s *state) addr(n *Node) *ssa.Value { + switch n.Op { + case ONAME: + if n.Class == PEXTERN { + // global variable + return s.f.Entry.NewValue(ssa.OpGlobal, Ptrto(n.Type), n.Sym) + } + if n.Class == PPARAMOUT { + // store to parameter slot + return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.fp) + } + // TODO: address of locals + log.Fatalf("variable address of %v not implemented", n) + return nil + case OINDREG: + // indirect off a register (TODO: always SP?) + // used for storing/loading arguments/returns to/from callees + return s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(n.Type), n.Xoffset, s.sp) + case OINDEX: + if n.Left.Type.Bound >= 0 { // array + a := s.addr(n.Left) + i := s.expr(n.Right) + len := s.f.ConstInt(s.config.Uintptr, n.Left.Type.Bound) + s.boundsCheck(i, len) + return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, a, i) + } else { // slice + a := s.expr(n.Left) + i := s.expr(n.Right) + len := s.curBlock.NewValue1(ssa.OpSliceLen, s.config.Uintptr, nil, a) + s.boundsCheck(i, len) + p := s.curBlock.NewValue1(ssa.OpSlicePtr, Ptrto(n.Left.Type.Type), nil, a) + return s.curBlock.NewValue2(ssa.OpPtrIndex, Ptrto(n.Left.Type.Type), nil, p, i) + } + default: + log.Fatalf("addr: bad op %v", n.Op) + return nil + } +} + +// nilCheck generates nil pointer checking code. +// Starts a new block on return. +func (s *state) nilCheck(ptr *ssa.Value) { + c := s.curBlock.NewValue1(ssa.OpIsNonNil, ssa.TypeBool, nil, ptr) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.Control = c + bNext := s.f.NewBlock(ssa.BlockPlain) + addEdge(b, bNext) + addEdge(b, s.exit) + s.startBlock(bNext) + // TODO(khr): Don't go directly to exit. Go to a stub that calls panicmem first. + // TODO: implicit nil checks somehow? +} + +// boundsCheck generates bounds checking code. Checks if 0 <= idx < len, branches to exit if not. +// Starts a new block on return. +func (s *state) boundsCheck(idx, len *ssa.Value) { + // TODO: convert index to full width? + // TODO: if index is 64-bit and we're compiling to 32-bit, check that high 32 bits are zero. + + // bounds check + cmp := s.curBlock.NewValue2(ssa.OpIsInBounds, ssa.TypeBool, nil, idx, len) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.Control = cmp + bNext := s.f.NewBlock(ssa.BlockPlain) + addEdge(b, bNext) + addEdge(b, s.exit) + // TODO: don't go directly to s.exit. Go to a stub that calls panicindex first. + s.startBlock(bNext) +} + // variable returns the value of a variable at the current location. -func (s *ssaState) variable(name string, t ssa.Type) *ssa.Value { +func (s *state) variable(name string, t ssa.Type) *ssa.Value { if s.curBlock == nil { log.Fatalf("nil curblock!") } @@ -381,11 +430,11 @@ func (s *ssaState) variable(name string, t ssa.Type) *ssa.Value { return v } -func (s *ssaState) mem() *ssa.Value { +func (s *state) mem() *ssa.Value { return s.variable(".mem", ssa.TypeMem) } -func (s *ssaState) linkForwardReferences() { +func (s *state) linkForwardReferences() { // Build ssa graph. Each variable on its first use in a basic block // leaves a FwdRef in that block representing the incoming value // of that variable. This function links that ref up with possible definitions, @@ -406,17 +455,16 @@ func (s *ssaState) linkForwardReferences() { } // lookupVarIncoming finds the variable's value at the start of block b. -func (s *ssaState) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa.Value { +func (s *state) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa.Value { // TODO(khr): have lookupVarIncoming overwrite the fwdRef or copy it // will be used in, instead of having the result used in a copy value. if b == s.f.Entry { if name == ".mem" { - return b.NewValue(ssa.OpArg, t, name) + return s.startmem } // variable is live at the entry block. Load it. - a := s.f.Entry.NewValue(ssa.OpFPAddr, Ptrto(t.(*Type)), s.argOffsets[name]) - m := b.NewValue(ssa.OpArg, ssa.TypeMem, ".mem") // TODO: reuse mem starting value - return b.NewValue2(ssa.OpLoad, t, nil, a, m) + addr := s.f.Entry.NewValue1(ssa.OpOffPtr, Ptrto(t.(*Type)), s.argOffsets[name], s.fp) + return b.NewValue2(ssa.OpLoad, t, nil, addr, s.startmem) } var vals []*ssa.Value for _, p := range b.Preds { @@ -435,7 +483,7 @@ func (s *ssaState) lookupVarIncoming(b *ssa.Block, t ssa.Type, name string) *ssa } // lookupVarOutgoing finds the variable's value at the end of block b. -func (s *ssaState) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name string) *ssa.Value { +func (s *state) lookupVarOutgoing(b *ssa.Block, t ssa.Type, name string) *ssa.Value { m := s.defvars[b.ID] if v, ok := m[name]; ok { return v @@ -568,13 +616,23 @@ func genValue(v *ssa.Value, frameSize int64) { p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpCMPQ: - x := regnum(v.Args[0]) - y := regnum(v.Args[1]) p := Prog(x86.ACMPQ) p.From.Type = obj.TYPE_REG - p.From.Reg = x + p.From.Reg = regnum(v.Args[0]) + p.To.Type = obj.TYPE_REG + p.To.Reg = regnum(v.Args[1]) + case ssa.OpCMPCQ: + p := Prog(x86.ACMPQ) + p.From.Type = obj.TYPE_REG + p.From.Reg = regnum(v.Args[0]) + p.To.Type = obj.TYPE_CONST + p.To.Offset = v.Aux.(int64) + case ssa.OpTESTB: + p := Prog(x86.ATESTB) + p.From.Type = obj.TYPE_REG + p.From.Reg = regnum(v.Args[0]) p.To.Type = obj.TYPE_REG - p.To.Reg = y + p.To.Reg = regnum(v.Args[1]) case ssa.OpMOVQconst: x := regnum(v) p := Prog(x86.AMOVQ) @@ -582,22 +640,57 @@ func genValue(v *ssa.Value, frameSize int64) { p.From.Offset = v.Aux.(int64) p.To.Type = obj.TYPE_REG p.To.Reg = x - case ssa.OpMOVQloadFP: - x := regnum(v) + case ssa.OpMOVQload: p := Prog(x86.AMOVQ) p.From.Type = obj.TYPE_MEM - p.From.Reg = x86.REG_SP - p.From.Offset = v.Aux.(int64) + frameSize + if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" { + // TODO: do the fp/sp adjustment somewhere else? + p.From.Reg = x86.REG_SP + p.From.Offset = v.Aux.(int64) + frameSize + } else { + p.From.Reg = regnum(v.Args[0]) + p.From.Offset = v.Aux.(int64) + } p.To.Type = obj.TYPE_REG - p.To.Reg = x - case ssa.OpMOVQstoreFP: - x := regnum(v.Args[0]) + p.To.Reg = regnum(v) + case ssa.OpMOVBload: + p := Prog(x86.AMOVB) + p.From.Type = obj.TYPE_MEM + if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" { + p.From.Reg = x86.REG_SP + p.From.Offset = v.Aux.(int64) + frameSize + } else { + p.From.Reg = regnum(v.Args[0]) + p.From.Offset = v.Aux.(int64) + } + p.To.Type = obj.TYPE_REG + p.To.Reg = regnum(v) + case ssa.OpMOVQloadidx8: + p := Prog(x86.AMOVQ) + p.From.Type = obj.TYPE_MEM + if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" { + p.From.Reg = x86.REG_SP + p.From.Offset = v.Aux.(int64) + frameSize + } else { + p.From.Reg = regnum(v.Args[0]) + p.From.Offset = v.Aux.(int64) + } + p.From.Scale = 8 + p.From.Index = regnum(v.Args[1]) + p.To.Type = obj.TYPE_REG + p.To.Reg = regnum(v) + case ssa.OpMOVQstore: p := Prog(x86.AMOVQ) p.From.Type = obj.TYPE_REG - p.From.Reg = x + p.From.Reg = regnum(v.Args[1]) p.To.Type = obj.TYPE_MEM - p.To.Reg = x86.REG_SP - p.To.Offset = v.Aux.(int64) + frameSize + if v.Block.Func.RegAlloc[v.Args[0].ID].Name() == "FP" { + p.To.Reg = x86.REG_SP + p.To.Offset = v.Aux.(int64) + frameSize + } else { + p.To.Reg = regnum(v.Args[0]) + p.To.Offset = v.Aux.(int64) + } case ssa.OpCopy: x := regnum(v.Args[0]) y := regnum(v) @@ -638,8 +731,19 @@ func genValue(v *ssa.Value, frameSize int64) { case ssa.OpArg: // memory arg needs no code // TODO: only mem arg goes here. + case ssa.OpLEAQglobal: + g := v.Aux.(ssa.GlobalOffset) + p := Prog(x86.ALEAQ) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = Linksym(g.Global.(*Sym)) + p.From.Offset = g.Offset + p.To.Type = obj.TYPE_REG + p.To.Reg = regnum(v) + case ssa.OpFP, ssa.OpSP: + // nothing to do default: - log.Fatalf("value %v not implemented yet", v) + log.Fatalf("value %s not implemented yet", v.LongString()) } } @@ -653,6 +757,40 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch { } case ssa.BlockExit: Prog(obj.ARET) + case ssa.BlockEQ: + if b.Succs[0] == next { + p := Prog(x86.AJNE) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[1]}) + } else if b.Succs[1] == next { + p := Prog(x86.AJEQ) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + } else { + p := Prog(x86.AJEQ) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + q := Prog(obj.AJMP) + q.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{q, b.Succs[1]}) + } + case ssa.BlockNE: + if b.Succs[0] == next { + p := Prog(x86.AJEQ) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[1]}) + } else if b.Succs[1] == next { + p := Prog(x86.AJNE) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + } else { + p := Prog(x86.AJNE) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + q := Prog(obj.AJMP) + q.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{q, b.Succs[1]}) + } case ssa.BlockLT: if b.Succs[0] == next { p := Prog(x86.AJGE) @@ -670,8 +808,43 @@ func genBlock(b, next *ssa.Block, branches []branch) []branch { q.To.Type = obj.TYPE_BRANCH branches = append(branches, branch{q, b.Succs[1]}) } + case ssa.BlockULT: + if b.Succs[0] == next { + p := Prog(x86.AJCC) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[1]}) + } else if b.Succs[1] == next { + p := Prog(x86.AJCS) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + } else { + p := Prog(x86.AJCS) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + q := Prog(obj.AJMP) + q.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{q, b.Succs[1]}) + } + case ssa.BlockUGT: + if b.Succs[0] == next { + p := Prog(x86.AJLS) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[1]}) + } else if b.Succs[1] == next { + p := Prog(x86.AJHI) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + } else { + p := Prog(x86.AJHI) + p.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{p, b.Succs[0]}) + q := Prog(obj.AJMP) + q.To.Type = obj.TYPE_BRANCH + branches = append(branches, branch{q, b.Succs[1]}) + } + default: - log.Fatalf("branch at %v not implemented yet", b) + log.Fatalf("branch %s not implemented yet", b.LongString()) } return branches } diff --git a/src/cmd/internal/obj/x86/6.out.go b/src/cmd/internal/obj/x86/6.out.go index c7f46e1801..e36cb9e7a3 100644 --- a/src/cmd/internal/obj/x86/6.out.go +++ b/src/cmd/internal/obj/x86/6.out.go @@ -110,23 +110,23 @@ const ( AINTO AIRETL AIRETW - AJCC - AJCS + AJCC // >= unsigned + AJCS // < unsigned AJCXZL - AJEQ - AJGE - AJGT - AJHI - AJLE - AJLS - AJLT - AJMI - AJNE - AJOC - AJOS - AJPC - AJPL - AJPS + AJEQ // == (zero) + AJGE // >= signed + AJGT // > signed + AJHI // > unsigned + AJLE // <= signed + AJLS // <= unsigned + AJLT // < signed + AJMI // sign bit set (negative) + AJNE // != (nonzero) + AJOC // overflow clear + AJOS // overflow set + AJPC // parity clear + AJPL // sign bit clear (positive) + AJPS // parity set ALAHF ALARL ALARW diff --git a/src/cmd/internal/ssa/check.go b/src/cmd/internal/ssa/check.go index 453388a899..667313ad9f 100644 --- a/src/cmd/internal/ssa/check.go +++ b/src/cmd/internal/ssa/check.go @@ -58,7 +58,7 @@ func checkFunc(f *Func) { if b.Control == nil { log.Panicf("exit block %s has no control value", b) } - if b.Control.Type != TypeMem { + if !b.Control.Type.IsMemory() { log.Panicf("exit block %s has non-memory control value %s", b, b.Control.LongString()) } case BlockPlain: @@ -75,7 +75,7 @@ func checkFunc(f *Func) { if b.Control == nil { log.Panicf("if block %s has no control value", b) } - if b.Control.Type != TypeBool { + if !b.Control.Type.IsBoolean() { log.Panicf("if block %s has non-bool control value %s", b, b.Control.LongString()) } case BlockCall: @@ -85,7 +85,7 @@ func checkFunc(f *Func) { if b.Control == nil { log.Panicf("call block %s has no control value", b) } - if b.Control.Type != TypeMem { + if !b.Control.Type.IsMemory() { log.Panicf("call block %s has non-memory control value %s", b, b.Control.LongString()) } if b.Succs[1].Kind != BlockExit { diff --git a/src/cmd/internal/ssa/config.go b/src/cmd/internal/ssa/config.go index 80acda4b23..9f1d2a8593 100644 --- a/src/cmd/internal/ssa/config.go +++ b/src/cmd/internal/ssa/config.go @@ -9,7 +9,7 @@ import "log" type Config struct { arch string // "amd64", etc. ptrSize int64 // 4 or 8 - UIntPtr Type // pointer arithmetic type + Uintptr Type // pointer arithmetic type lower func(*Value) bool // lowering function // TODO: more stuff. Compiler flags of interest, ... @@ -30,9 +30,9 @@ func NewConfig(arch string) *Config { } // cache the intptr type in the config - c.UIntPtr = TypeUInt32 + c.Uintptr = TypeUInt32 if c.ptrSize == 8 { - c.UIntPtr = TypeUInt64 + c.Uintptr = TypeUInt64 } return c diff --git a/src/cmd/internal/ssa/generic.go b/src/cmd/internal/ssa/generic.go index 2a96793c61..91f9c17d11 100644 --- a/src/cmd/internal/ssa/generic.go +++ b/src/cmd/internal/ssa/generic.go @@ -6,20 +6,20 @@ func genericRules(v *Value) bool { switch v.Op { case OpAdd: // match: (Add (Const [c]) (Const [d])) - // cond: is64BitInt(t) && isSigned(t) + // cond: is64BitInt(t) // result: (Const [{c.(int64)+d.(int64)}]) { t := v.Type if v.Args[0].Op != OpConst { - goto endc86f5c160a87f6f5ec90b6551ec099d9 + goto end8d047ed0ae9537b840adc79ea82c6e05 } c := v.Args[0].Aux if v.Args[1].Op != OpConst { - goto endc86f5c160a87f6f5ec90b6551ec099d9 + goto end8d047ed0ae9537b840adc79ea82c6e05 } d := v.Args[1].Aux - if !(is64BitInt(t) && isSigned(t)) { - goto endc86f5c160a87f6f5ec90b6551ec099d9 + if !(is64BitInt(t)) { + goto end8d047ed0ae9537b840adc79ea82c6e05 } v.Op = OpConst v.Aux = nil @@ -27,100 +27,141 @@ func genericRules(v *Value) bool { v.Aux = c.(int64) + d.(int64) return true } - goto endc86f5c160a87f6f5ec90b6551ec099d9 - endc86f5c160a87f6f5ec90b6551ec099d9: + goto end8d047ed0ae9537b840adc79ea82c6e05 + end8d047ed0ae9537b840adc79ea82c6e05: ; - // match: (Add (Const [c]) (Const [d])) - // cond: is64BitInt(t) && !isSigned(t) - // result: (Const [{c.(uint64)+d.(uint64)}]) + case OpArrayIndex: + // match: (ArrayIndex (Load ptr mem) idx) + // cond: + // result: (Load (PtrIndex ptr idx) mem) + { + if v.Args[0].Op != OpLoad { + goto end3809f4c52270a76313e4ea26e6f0b753 + } + ptr := v.Args[0].Args[0] + mem := v.Args[0].Args[1] + idx := v.Args[1] + v.Op = OpLoad + v.Aux = nil + v.resetArgs() + v0 := v.Block.NewValue(OpPtrIndex, TypeInvalid, nil) + v0.Type = ptr.Type.Elem().Elem().PtrTo() + v0.AddArg(ptr) + v0.AddArg(idx) + v.AddArg(v0) + v.AddArg(mem) + return true + } + goto end3809f4c52270a76313e4ea26e6f0b753 + end3809f4c52270a76313e4ea26e6f0b753: + ; + case OpIsInBounds: + // match: (IsInBounds (Const [c]) (Const [d])) + // cond: + // result: (Const [inBounds(c.(int64),d.(int64))]) + { + if v.Args[0].Op != OpConst { + goto enddbd1a394d9b71ee64335361b8384865c + } + c := v.Args[0].Aux + if v.Args[1].Op != OpConst { + goto enddbd1a394d9b71ee64335361b8384865c + } + d := v.Args[1].Aux + v.Op = OpConst + v.Aux = nil + v.resetArgs() + v.Aux = inBounds(c.(int64), d.(int64)) + return true + } + goto enddbd1a394d9b71ee64335361b8384865c + enddbd1a394d9b71ee64335361b8384865c: + ; + case OpMul: + // match: (Mul (Const [c]) (Const [d])) + // cond: is64BitInt(t) + // result: (Const [{c.(int64)*d.(int64)}]) { t := v.Type if v.Args[0].Op != OpConst { - goto end8941c2a515c1bd38530b7fd96862bac4 + goto end776610f88cf04f438242d76ed2b14f1c } c := v.Args[0].Aux if v.Args[1].Op != OpConst { - goto end8941c2a515c1bd38530b7fd96862bac4 + goto end776610f88cf04f438242d76ed2b14f1c } d := v.Args[1].Aux - if !(is64BitInt(t) && !isSigned(t)) { - goto end8941c2a515c1bd38530b7fd96862bac4 + if !(is64BitInt(t)) { + goto end776610f88cf04f438242d76ed2b14f1c } v.Op = OpConst v.Aux = nil v.resetArgs() - v.Aux = c.(uint64) + d.(uint64) + v.Aux = c.(int64) * d.(int64) return true } - goto end8941c2a515c1bd38530b7fd96862bac4 - end8941c2a515c1bd38530b7fd96862bac4: + goto end776610f88cf04f438242d76ed2b14f1c + end776610f88cf04f438242d76ed2b14f1c: ; - case OpSliceCap: - // match: (SliceCap (Load ptr mem)) + case OpPtrIndex: + // match: (PtrIndex ptr idx) // cond: - // result: (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize*2)])) mem) + // result: (Add ptr (Mul idx (Const [t.Elem().Size()]))) { - if v.Args[0].Op != OpLoad { - goto ende03f9b79848867df439b56889bb4e55d - } - ptr := v.Args[0].Args[0] - mem := v.Args[0].Args[1] - v.Op = OpLoad + t := v.Type + ptr := v.Args[0] + idx := v.Args[1] + v.Op = OpAdd v.Aux = nil v.resetArgs() - v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil) - v0.Type = ptr.Type - v0.AddArg(ptr) + v.AddArg(ptr) + v0 := v.Block.NewValue(OpMul, TypeInvalid, nil) + v0.Type = v.Block.Func.Config.Uintptr + v0.AddArg(idx) v1 := v.Block.NewValue(OpConst, TypeInvalid, nil) - v1.Type = v.Block.Func.Config.UIntPtr - v1.Aux = int64(v.Block.Func.Config.ptrSize * 2) + v1.Type = v.Block.Func.Config.Uintptr + v1.Aux = t.Elem().Size() v0.AddArg(v1) v.AddArg(v0) - v.AddArg(mem) return true } - goto ende03f9b79848867df439b56889bb4e55d - ende03f9b79848867df439b56889bb4e55d: + goto end383c68c41e72d22ef00c4b7b0fddcbb8 + end383c68c41e72d22ef00c4b7b0fddcbb8: ; - case OpSliceIndex: - // match: (SliceIndex s i mem) + case OpSliceCap: + // match: (SliceCap (Load ptr mem)) // cond: - // result: (Load (Add (SlicePtr s) (Mul i (Const [s.Type.Elem().Size()]))) mem) + // result: (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize*2)])) mem) { - s := v.Args[0] - i := v.Args[1] - mem := v.Args[2] + if v.Args[0].Op != OpLoad { + goto endbf1d4db93c4664ed43be3f73afb4dfa3 + } + ptr := v.Args[0].Args[0] + mem := v.Args[0].Args[1] v.Op = OpLoad v.Aux = nil v.resetArgs() v0 := v.Block.NewValue(OpAdd, TypeInvalid, nil) - v0.Type = s.Type.Elem().PtrTo() - v1 := v.Block.NewValue(OpSlicePtr, TypeInvalid, nil) - v1.Type = s.Type.Elem().PtrTo() - v1.AddArg(s) + v0.Type = ptr.Type + v0.AddArg(ptr) + v1 := v.Block.NewValue(OpConst, TypeInvalid, nil) + v1.Type = v.Block.Func.Config.Uintptr + v1.Aux = int64(v.Block.Func.Config.ptrSize * 2) v0.AddArg(v1) - v2 := v.Block.NewValue(OpMul, TypeInvalid, nil) - v2.Type = v.Block.Func.Config.UIntPtr - v2.AddArg(i) - v3 := v.Block.NewValue(OpConst, TypeInvalid, nil) - v3.Type = v.Block.Func.Config.UIntPtr - v3.Aux = s.Type.Elem().Size() - v2.AddArg(v3) - v0.AddArg(v2) v.AddArg(v0) v.AddArg(mem) return true } - goto end733704831a61760840348f790b3ab045 - end733704831a61760840348f790b3ab045: + goto endbf1d4db93c4664ed43be3f73afb4dfa3 + endbf1d4db93c4664ed43be3f73afb4dfa3: ; case OpSliceLen: // match: (SliceLen (Load ptr mem)) // cond: - // result: (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize)])) mem) + // result: (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize)])) mem) { if v.Args[0].Op != OpLoad { - goto ende94950a57eca1871c93afdeaadb90223 + goto end9190b1ecbda4c5dd6d3e05d2495fb297 } ptr := v.Args[0].Args[0] mem := v.Args[0].Args[1] @@ -131,15 +172,15 @@ func genericRules(v *Value) bool { v0.Type = ptr.Type v0.AddArg(ptr) v1 := v.Block.NewValue(OpConst, TypeInvalid, nil) - v1.Type = v.Block.Func.Config.UIntPtr + v1.Type = v.Block.Func.Config.Uintptr v1.Aux = int64(v.Block.Func.Config.ptrSize) v0.AddArg(v1) v.AddArg(v0) v.AddArg(mem) return true } - goto ende94950a57eca1871c93afdeaadb90223 - ende94950a57eca1871c93afdeaadb90223: + goto end9190b1ecbda4c5dd6d3e05d2495fb297 + end9190b1ecbda4c5dd6d3e05d2495fb297: ; case OpSlicePtr: // match: (SlicePtr (Load ptr mem)) @@ -160,6 +201,36 @@ func genericRules(v *Value) bool { } goto end459613b83f95b65729d45c2ed663a153 end459613b83f95b65729d45c2ed663a153: + ; + case OpStore: + // match: (Store dst (Load src mem) mem) + // cond: t.Size() > 8 + // result: (Move [t.Size()] dst src mem) + { + dst := v.Args[0] + if v.Args[1].Op != OpLoad { + goto end324ffb6d2771808da4267f62c854e9c8 + } + t := v.Args[1].Type + src := v.Args[1].Args[0] + mem := v.Args[1].Args[1] + if v.Args[2] != v.Args[1].Args[1] { + goto end324ffb6d2771808da4267f62c854e9c8 + } + if !(t.Size() > 8) { + goto end324ffb6d2771808da4267f62c854e9c8 + } + v.Op = OpMove + v.Aux = nil + v.resetArgs() + v.Aux = t.Size() + v.AddArg(dst) + v.AddArg(src) + v.AddArg(mem) + return true + } + goto end324ffb6d2771808da4267f62c854e9c8 + end324ffb6d2771808da4267f62c854e9c8: } return false } diff --git a/src/cmd/internal/ssa/lower.go b/src/cmd/internal/ssa/lower.go index 82e5d23241..84379c00de 100644 --- a/src/cmd/internal/ssa/lower.go +++ b/src/cmd/internal/ssa/lower.go @@ -16,41 +16,88 @@ func lower(f *Func) { // additional pass for 386/amd64, link condition codes directly to blocks // TODO: do generically somehow? Special "block" rewrite rules? for _, b := range f.Blocks { - switch b.Kind { - case BlockIf: - switch b.Control.Op { - case OpSETL: - b.Kind = BlockLT - b.Control = b.Control.Args[0] - case OpSETNE: - b.Kind = BlockNE - b.Control = b.Control.Args[0] - case OpSETB: - b.Kind = BlockULT - b.Control = b.Control.Args[0] - // TODO: others + for { + switch b.Kind { + case BlockIf: + switch b.Control.Op { + case OpSETL: + b.Kind = BlockLT + b.Control = b.Control.Args[0] + continue + case OpSETNE: + b.Kind = BlockNE + b.Control = b.Control.Args[0] + continue + case OpSETB: + b.Kind = BlockULT + b.Control = b.Control.Args[0] + continue + case OpMOVBload: + b.Kind = BlockNE + b.Control = b.NewValue2(OpTESTB, TypeFlags, nil, b.Control, b.Control) + continue + // TODO: others + } + case BlockLT: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockGT + b.Control = b.Control.Args[0] + continue + } + case BlockGT: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockLT + b.Control = b.Control.Args[0] + continue + } + case BlockLE: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockGE + b.Control = b.Control.Args[0] + continue + } + case BlockGE: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockLE + b.Control = b.Control.Args[0] + continue + } + case BlockULT: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockUGT + b.Control = b.Control.Args[0] + continue + } + case BlockUGT: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockULT + b.Control = b.Control.Args[0] + continue + } + case BlockULE: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockUGE + b.Control = b.Control.Args[0] + continue + } + case BlockUGE: + if b.Control.Op == OpInvertFlags { + b.Kind = BlockULE + b.Control = b.Control.Args[0] + continue + } + case BlockEQ: + if b.Control.Op == OpInvertFlags { + b.Control = b.Control.Args[0] + continue + } + case BlockNE: + if b.Control.Op == OpInvertFlags { + b.Control = b.Control.Args[0] + continue + } } - case BlockLT: - if b.Control.Op == OpInvertFlags { - b.Kind = BlockGE - b.Control = b.Control.Args[0] - } - case BlockULT: - if b.Control.Op == OpInvertFlags { - b.Kind = BlockUGE - b.Control = b.Control.Args[0] - } - case BlockEQ: - if b.Control.Op == OpInvertFlags { - b.Kind = BlockNE - b.Control = b.Control.Args[0] - } - case BlockNE: - if b.Control.Op == OpInvertFlags { - b.Kind = BlockEQ - b.Control = b.Control.Args[0] - } - // TODO: others + break } } } diff --git a/src/cmd/internal/ssa/lowerAmd64.go b/src/cmd/internal/ssa/lowerAmd64.go index ef891c37d9..356f646dcc 100644 --- a/src/cmd/internal/ssa/lowerAmd64.go +++ b/src/cmd/internal/ssa/lowerAmd64.go @@ -7,11 +7,11 @@ func lowerAmd64(v *Value) bool { case OpADDCQ: // match: (ADDCQ [c] (LEAQ8 [d] x y)) // cond: - // result: (LEAQ8 [c.(int64)+d.(int64)] x y) + // result: (LEAQ8 [addOff(c, d)] x y) { c := v.Aux if v.Args[0].Op != OpLEAQ8 { - goto end16348939e556e99e8447227ecb986f01 + goto end3bc1457811adc0cb81ad6b88a7461c60 } d := v.Args[0].Aux x := v.Args[0].Args[0] @@ -19,58 +19,40 @@ func lowerAmd64(v *Value) bool { v.Op = OpLEAQ8 v.Aux = nil v.resetArgs() - v.Aux = c.(int64) + d.(int64) + v.Aux = addOff(c, d) v.AddArg(x) v.AddArg(y) return true } - goto end16348939e556e99e8447227ecb986f01 - end16348939e556e99e8447227ecb986f01: + goto end3bc1457811adc0cb81ad6b88a7461c60 + end3bc1457811adc0cb81ad6b88a7461c60: ; - // match: (ADDCQ [off1] (FPAddr [off2])) - // cond: - // result: (FPAddr [off1.(int64)+off2.(int64)]) - { - off1 := v.Aux - if v.Args[0].Op != OpFPAddr { - goto end28e093ab0618066e6b2609db7aaf309b - } - off2 := v.Args[0].Aux - v.Op = OpFPAddr - v.Aux = nil - v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) - return true - } - goto end28e093ab0618066e6b2609db7aaf309b - end28e093ab0618066e6b2609db7aaf309b: - ; - // match: (ADDCQ [off1] (SPAddr [off2])) - // cond: - // result: (SPAddr [off1.(int64)+off2.(int64)]) + // match: (ADDCQ [off] x) + // cond: off.(int64) == 0 + // result: (Copy x) { - off1 := v.Aux - if v.Args[0].Op != OpSPAddr { - goto endd0c27c62d150b88168075c5ba113d1fa + off := v.Aux + x := v.Args[0] + if !(off.(int64) == 0) { + goto end6710a6679c47b70577ecea7ad00dae87 } - off2 := v.Args[0].Aux - v.Op = OpSPAddr + v.Op = OpCopy v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) + v.AddArg(x) return true } - goto endd0c27c62d150b88168075c5ba113d1fa - endd0c27c62d150b88168075c5ba113d1fa: + goto end6710a6679c47b70577ecea7ad00dae87 + end6710a6679c47b70577ecea7ad00dae87: ; case OpADDQ: - // match: (ADDQ x (Const [c])) + // match: (ADDQ x (MOVQconst [c])) // cond: // result: (ADDCQ [c] x) { x := v.Args[0] - if v.Args[1].Op != OpConst { - goto endef6908cfdf56e102cc327a3ddc14393d + if v.Args[1].Op != OpMOVQconst { + goto end39b79e84f20a6d44b5c4136aae220ac2 } c := v.Args[1].Aux v.Op = OpADDCQ @@ -80,15 +62,15 @@ func lowerAmd64(v *Value) bool { v.AddArg(x) return true } - goto endef6908cfdf56e102cc327a3ddc14393d - endef6908cfdf56e102cc327a3ddc14393d: + goto end39b79e84f20a6d44b5c4136aae220ac2 + end39b79e84f20a6d44b5c4136aae220ac2: ; - // match: (ADDQ (Const [c]) x) + // match: (ADDQ (MOVQconst [c]) x) // cond: // result: (ADDCQ [c] x) { - if v.Args[0].Op != OpConst { - goto endb54a32cf3147f424f08b46db62c69b23 + if v.Args[0].Op != OpMOVQconst { + goto endc05ff5a2a132241b69d00c852001d820 } c := v.Args[0].Aux x := v.Args[1] @@ -99,8 +81,8 @@ func lowerAmd64(v *Value) bool { v.AddArg(x) return true } - goto endb54a32cf3147f424f08b46db62c69b23 - endb54a32cf3147f424f08b46db62c69b23: + goto endc05ff5a2a132241b69d00c852001d820 + endc05ff5a2a132241b69d00c852001d820: ; // match: (ADDQ x (SHLCQ [shift] y)) // cond: shift.(int64) == 3 @@ -168,13 +150,13 @@ func lowerAmd64(v *Value) bool { end35a02a1587264e40cf1055856ff8445a: ; case OpCMPQ: - // match: (CMPQ x (Const [c])) + // match: (CMPQ x (MOVQconst [c])) // cond: // result: (CMPCQ x [c]) { x := v.Args[0] - if v.Args[1].Op != OpConst { - goto end1770a40e4253d9f669559a360514613e + if v.Args[1].Op != OpMOVQconst { + goto endf180bae15b3d24c0213520d7f7aa98b4 } c := v.Args[1].Aux v.Op = OpCMPCQ @@ -184,15 +166,15 @@ func lowerAmd64(v *Value) bool { v.Aux = c return true } - goto end1770a40e4253d9f669559a360514613e - end1770a40e4253d9f669559a360514613e: + goto endf180bae15b3d24c0213520d7f7aa98b4 + endf180bae15b3d24c0213520d7f7aa98b4: ; - // match: (CMPQ (Const [c]) x) + // match: (CMPQ (MOVQconst [c]) x) // cond: // result: (InvertFlags (CMPCQ x [c])) { - if v.Args[0].Op != OpConst { - goto enda4e64c7eaeda16c1c0db9dac409cd126 + if v.Args[0].Op != OpMOVQconst { + goto end8fc58bffa73b3df80b3de72c91844884 } c := v.Args[0].Aux x := v.Args[1] @@ -206,8 +188,42 @@ func lowerAmd64(v *Value) bool { v.AddArg(v0) return true } - goto enda4e64c7eaeda16c1c0db9dac409cd126 - enda4e64c7eaeda16c1c0db9dac409cd126: + goto end8fc58bffa73b3df80b3de72c91844884 + end8fc58bffa73b3df80b3de72c91844884: + ; + case OpConst: + // match: (Const [val]) + // cond: is64BitInt(t) + // result: (MOVQconst [val]) + { + t := v.Type + val := v.Aux + if !(is64BitInt(t)) { + goto end7f5c5b34093fbc6860524cb803ee51bf + } + v.Op = OpMOVQconst + v.Aux = nil + v.resetArgs() + v.Aux = val + return true + } + goto end7f5c5b34093fbc6860524cb803ee51bf + end7f5c5b34093fbc6860524cb803ee51bf: + ; + case OpGlobal: + // match: (Global [sym]) + // cond: + // result: (LEAQglobal [GlobalOffset{sym,0}]) + { + sym := v.Aux + v.Op = OpLEAQglobal + v.Aux = nil + v.resetArgs() + v.Aux = GlobalOffset{sym, 0} + return true + } + goto end3a3c76fac0e2e53c0e1c60b9524e6f1c + end3a3c76fac0e2e53c0e1c60b9524e6f1c: ; case OpIsInBounds: // match: (IsInBounds idx len) @@ -273,16 +289,16 @@ func lowerAmd64(v *Value) bool { ; case OpLoad: // match: (Load ptr mem) - // cond: (is64BitInt(t) || isPtr(t)) - // result: (MOVQload [int64(0)] ptr mem) + // cond: t.IsBoolean() + // result: (MOVBload [int64(0)] ptr mem) { t := v.Type ptr := v.Args[0] mem := v.Args[1] - if !(is64BitInt(t) || isPtr(t)) { - goto end581ce5a20901df1b8143448ba031685b + if !(t.IsBoolean()) { + goto end73f21632e56c3614902d3c29c82dc4ea } - v.Op = OpMOVQload + v.Op = OpMOVBload v.Aux = nil v.resetArgs() v.Aux = int64(0) @@ -290,77 +306,38 @@ func lowerAmd64(v *Value) bool { v.AddArg(mem) return true } - goto end581ce5a20901df1b8143448ba031685b - end581ce5a20901df1b8143448ba031685b: - ; - case OpMOVQload: - // match: (MOVQload [off1] (FPAddr [off2]) mem) - // cond: - // result: (MOVQloadFP [off1.(int64)+off2.(int64)] mem) - { - off1 := v.Aux - if v.Args[0].Op != OpFPAddr { - goto endce972b1aa84b56447978c43def87fa57 - } - off2 := v.Args[0].Aux - mem := v.Args[1] - v.Op = OpMOVQloadFP - v.Aux = nil - v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) - v.AddArg(mem) - return true - } - goto endce972b1aa84b56447978c43def87fa57 - endce972b1aa84b56447978c43def87fa57: + goto end73f21632e56c3614902d3c29c82dc4ea + end73f21632e56c3614902d3c29c82dc4ea: ; - // match: (MOVQload [off1] (SPAddr [off2]) mem) - // cond: - // result: (MOVQloadSP [off1.(int64)+off2.(int64)] mem) + // match: (Load ptr mem) + // cond: (is64BitInt(t) || isPtr(t)) + // result: (MOVQload [int64(0)] ptr mem) { - off1 := v.Aux - if v.Args[0].Op != OpSPAddr { - goto end3d8628a6536350a123be81240b8a1376 - } - off2 := v.Args[0].Aux + t := v.Type + ptr := v.Args[0] mem := v.Args[1] - v.Op = OpMOVQloadSP - v.Aux = nil - v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) - v.AddArg(mem) - return true - } - goto end3d8628a6536350a123be81240b8a1376 - end3d8628a6536350a123be81240b8a1376: - ; - // match: (MOVQload [off] (Global [sym]) mem) - // cond: - // result: (MOVQloadglobal [GlobalOffset{sym,off.(int64)}] mem) - { - off := v.Aux - if v.Args[0].Op != OpGlobal { - goto end20693899317f3f8d1b47fefa64087654 + if !(is64BitInt(t) || isPtr(t)) { + goto end581ce5a20901df1b8143448ba031685b } - sym := v.Args[0].Aux - mem := v.Args[1] - v.Op = OpMOVQloadglobal + v.Op = OpMOVQload v.Aux = nil v.resetArgs() - v.Aux = GlobalOffset{sym, off.(int64)} + v.Aux = int64(0) + v.AddArg(ptr) v.AddArg(mem) return true } - goto end20693899317f3f8d1b47fefa64087654 - end20693899317f3f8d1b47fefa64087654: + goto end581ce5a20901df1b8143448ba031685b + end581ce5a20901df1b8143448ba031685b: ; + case OpMOVQload: // match: (MOVQload [off1] (ADDCQ [off2] ptr) mem) // cond: - // result: (MOVQload [off1.(int64)+off2.(int64)] ptr mem) + // result: (MOVQload [addOff(off1, off2)] ptr mem) { off1 := v.Aux if v.Args[0].Op != OpADDCQ { - goto enda68a39292ba2a05b3436191cb0bb0516 + goto end218ceec16b8299d573d3c9ccaa69b086 } off2 := v.Args[0].Aux ptr := v.Args[0].Args[0] @@ -368,21 +345,21 @@ func lowerAmd64(v *Value) bool { v.Op = OpMOVQload v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) + v.Aux = addOff(off1, off2) v.AddArg(ptr) v.AddArg(mem) return true } - goto enda68a39292ba2a05b3436191cb0bb0516 - enda68a39292ba2a05b3436191cb0bb0516: + goto end218ceec16b8299d573d3c9ccaa69b086 + end218ceec16b8299d573d3c9ccaa69b086: ; // match: (MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) // cond: - // result: (MOVQloadidx8 [off1.(int64)+off2.(int64)] ptr idx mem) + // result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) { off1 := v.Aux if v.Args[0].Op != OpLEAQ8 { - goto endba0e5cee85021614041016b1a2709ab8 + goto end02f5ad148292c46463e7c20d3b821735 } off2 := v.Args[0].Aux ptr := v.Args[0].Args[0] @@ -391,131 +368,117 @@ func lowerAmd64(v *Value) bool { v.Op = OpMOVQloadidx8 v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) + v.Aux = addOff(off1, off2) v.AddArg(ptr) v.AddArg(idx) v.AddArg(mem) return true } - goto endba0e5cee85021614041016b1a2709ab8 - endba0e5cee85021614041016b1a2709ab8: + goto end02f5ad148292c46463e7c20d3b821735 + end02f5ad148292c46463e7c20d3b821735: ; - case OpMOVQstore: - // match: (MOVQstore [off1] (FPAddr [off2]) val mem) + case OpMOVQloadidx8: + // match: (MOVQloadidx8 [off1] (ADDCQ [off2] ptr) idx mem) // cond: - // result: (MOVQstoreFP [off1.(int64)+off2.(int64)] val mem) + // result: (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) { off1 := v.Aux - if v.Args[0].Op != OpFPAddr { - goto end0a2a81a20558dfc93790aecb1e9cc81a + if v.Args[0].Op != OpADDCQ { + goto ende47e8d742e2615f39fb6509a5749e414 } off2 := v.Args[0].Aux - val := v.Args[1] + ptr := v.Args[0].Args[0] + idx := v.Args[1] mem := v.Args[2] - v.Op = OpMOVQstoreFP + v.Op = OpMOVQloadidx8 v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) - v.AddArg(val) + v.Aux = addOff(off1, off2) + v.AddArg(ptr) + v.AddArg(idx) v.AddArg(mem) return true } - goto end0a2a81a20558dfc93790aecb1e9cc81a - end0a2a81a20558dfc93790aecb1e9cc81a: + goto ende47e8d742e2615f39fb6509a5749e414 + ende47e8d742e2615f39fb6509a5749e414: ; - // match: (MOVQstore [off1] (SPAddr [off2]) val mem) + case OpMOVQstore: + // match: (MOVQstore [off1] (ADDCQ [off2] ptr) val mem) // cond: - // result: (MOVQstoreSP [off1.(int64)+off2.(int64)] val mem) + // result: (MOVQstore [addOff(off1, off2)] ptr val mem) { off1 := v.Aux - if v.Args[0].Op != OpSPAddr { - goto end1cb5b7e766f018270fa434c6f46f607f + if v.Args[0].Op != OpADDCQ { + goto enddfd4c7a20fd3b84eb9dcf84b98c661fc } off2 := v.Args[0].Aux + ptr := v.Args[0].Args[0] val := v.Args[1] mem := v.Args[2] - v.Op = OpMOVQstoreSP - v.Aux = nil - v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) - v.AddArg(val) - v.AddArg(mem) - return true - } - goto end1cb5b7e766f018270fa434c6f46f607f - end1cb5b7e766f018270fa434c6f46f607f: - ; - // match: (MOVQstore [off] (Global [sym]) val mem) - // cond: - // result: (MOVQstoreglobal [GlobalOffset{sym,off.(int64)}] val mem) - { - off := v.Aux - if v.Args[0].Op != OpGlobal { - goto end657d07e37c720a8fbb108a31bb48090d - } - sym := v.Args[0].Aux - val := v.Args[1] - mem := v.Args[2] - v.Op = OpMOVQstoreglobal + v.Op = OpMOVQstore v.Aux = nil v.resetArgs() - v.Aux = GlobalOffset{sym, off.(int64)} + v.Aux = addOff(off1, off2) + v.AddArg(ptr) v.AddArg(val) v.AddArg(mem) return true } - goto end657d07e37c720a8fbb108a31bb48090d - end657d07e37c720a8fbb108a31bb48090d: + goto enddfd4c7a20fd3b84eb9dcf84b98c661fc + enddfd4c7a20fd3b84eb9dcf84b98c661fc: ; - // match: (MOVQstore [off1] (ADDCQ [off2] ptr) val mem) + // match: (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) // cond: - // result: (MOVQstore [off1.(int64)+off2.(int64)] ptr val mem) + // result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) { off1 := v.Aux - if v.Args[0].Op != OpADDCQ { - goto end271e3052de832e22b1f07576af2854de + if v.Args[0].Op != OpLEAQ8 { + goto endce1db8c8d37c8397c500a2068a65c215 } off2 := v.Args[0].Aux ptr := v.Args[0].Args[0] + idx := v.Args[0].Args[1] val := v.Args[1] mem := v.Args[2] - v.Op = OpMOVQstore + v.Op = OpMOVQstoreidx8 v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) + v.Aux = addOff(off1, off2) v.AddArg(ptr) + v.AddArg(idx) v.AddArg(val) v.AddArg(mem) return true } - goto end271e3052de832e22b1f07576af2854de - end271e3052de832e22b1f07576af2854de: + goto endce1db8c8d37c8397c500a2068a65c215 + endce1db8c8d37c8397c500a2068a65c215: ; - // match: (MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) + case OpMOVQstoreidx8: + // match: (MOVQstoreidx8 [off1] (ADDCQ [off2] ptr) idx val mem) // cond: - // result: (MOVQstoreidx8 [off1.(int64)+off2.(int64)] ptr idx val mem) + // result: (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) { off1 := v.Aux - if v.Args[0].Op != OpLEAQ8 { - goto end4ad469f534c7369f6ac36bdace3462ad + if v.Args[0].Op != OpADDCQ { + goto endcdb222707a568ad468f7fff2fc42fc39 } off2 := v.Args[0].Aux ptr := v.Args[0].Args[0] - idx := v.Args[0].Args[1] - val := v.Args[1] - mem := v.Args[2] + idx := v.Args[1] + val := v.Args[2] + mem := v.Args[3] v.Op = OpMOVQstoreidx8 v.Aux = nil v.resetArgs() - v.Aux = off1.(int64) + off2.(int64) + v.Aux = addOff(off1, off2) v.AddArg(ptr) v.AddArg(idx) v.AddArg(val) v.AddArg(mem) return true } - goto end4ad469f534c7369f6ac36bdace3462ad - end4ad469f534c7369f6ac36bdace3462ad: + goto endcdb222707a568ad468f7fff2fc42fc39 + endcdb222707a568ad468f7fff2fc42fc39: ; case OpMULCQ: // match: (MULCQ [c] x) @@ -538,13 +501,13 @@ func lowerAmd64(v *Value) bool { end90a1c055d9658aecacce5e101c1848b4: ; case OpMULQ: - // match: (MULQ x (Const [c])) + // match: (MULQ x (MOVQconst [c])) // cond: // result: (MULCQ [c] x) { x := v.Args[0] - if v.Args[1].Op != OpConst { - goto endc427f4838d2e83c00cc097b20bd20a37 + if v.Args[1].Op != OpMOVQconst { + goto endce35d001482ea209e62e9394bd07c7cb } c := v.Args[1].Aux v.Op = OpMULCQ @@ -554,15 +517,15 @@ func lowerAmd64(v *Value) bool { v.AddArg(x) return true } - goto endc427f4838d2e83c00cc097b20bd20a37 - endc427f4838d2e83c00cc097b20bd20a37: + goto endce35d001482ea209e62e9394bd07c7cb + endce35d001482ea209e62e9394bd07c7cb: ; - // match: (MULQ (Const [c]) x) + // match: (MULQ (MOVQconst [c]) x) // cond: // result: (MULCQ [c] x) { - if v.Args[0].Op != OpConst { - goto endd70de938e71150d1c9e8173c2a5b2d95 + if v.Args[0].Op != OpMOVQconst { + goto end804f58b1f6a7cce19d48379999ec03f1 } c := v.Args[0].Aux x := v.Args[1] @@ -573,8 +536,32 @@ func lowerAmd64(v *Value) bool { v.AddArg(x) return true } - goto endd70de938e71150d1c9e8173c2a5b2d95 - endd70de938e71150d1c9e8173c2a5b2d95: + goto end804f58b1f6a7cce19d48379999ec03f1 + end804f58b1f6a7cce19d48379999ec03f1: + ; + case OpMove: + // match: (Move [size] dst src mem) + // cond: + // result: (REPMOVSB dst src (Const [size.(int64)]) mem) + { + size := v.Aux + dst := v.Args[0] + src := v.Args[1] + mem := v.Args[2] + v.Op = OpREPMOVSB + v.Aux = nil + v.resetArgs() + v.AddArg(dst) + v.AddArg(src) + v0 := v.Block.NewValue(OpConst, TypeInvalid, nil) + v0.Type = TypeUInt64 + v0.Aux = size.(int64) + v.AddArg(v0) + v.AddArg(mem) + return true + } + goto end48909259b265a6bb2a076bc2c2dc7d1f + end48909259b265a6bb2a076bc2c2dc7d1f: ; case OpMul: // match: (Mul x y) @@ -597,6 +584,23 @@ func lowerAmd64(v *Value) bool { goto endfab0d598f376ecba45a22587d50f7aff endfab0d598f376ecba45a22587d50f7aff: ; + case OpOffPtr: + // match: (OffPtr [off] ptr) + // cond: + // result: (ADDCQ [off] ptr) + { + off := v.Aux + ptr := v.Args[0] + v.Op = OpADDCQ + v.Aux = nil + v.resetArgs() + v.Aux = off + v.AddArg(ptr) + return true + } + goto endfe8f713b1d237a23311fb721ee46bedb + endfe8f713b1d237a23311fb721ee46bedb: + ; case OpSETL: // match: (SETL (InvertFlags x)) // cond: @@ -616,13 +620,13 @@ func lowerAmd64(v *Value) bool { end456c7681d48305698c1ef462d244bdc6: ; case OpSUBQ: - // match: (SUBQ x (Const [c])) + // match: (SUBQ x (MOVQconst [c])) // cond: // result: (SUBCQ x [c]) { x := v.Args[0] - if v.Args[1].Op != OpConst { - goto endb31e242f283867de4722665a5796008c + if v.Args[1].Op != OpMOVQconst { + goto endc96cd1cb2dd98427c34fb9543feca4fe } c := v.Args[1].Aux v.Op = OpSUBCQ @@ -632,16 +636,16 @@ func lowerAmd64(v *Value) bool { v.Aux = c return true } - goto endb31e242f283867de4722665a5796008c - endb31e242f283867de4722665a5796008c: + goto endc96cd1cb2dd98427c34fb9543feca4fe + endc96cd1cb2dd98427c34fb9543feca4fe: ; - // match: (SUBQ (Const [c]) x) + // match: (SUBQ (MOVQconst [c]) x) // cond: // result: (NEGQ (SUBCQ x [c])) { t := v.Type - if v.Args[0].Op != OpConst { - goto end569cc755877d1f89a701378bec05c08d + if v.Args[0].Op != OpMOVQconst { + goto end900aaaf28cefac6bb62e76b5151611cf } c := v.Args[0].Aux x := v.Args[1] @@ -655,8 +659,8 @@ func lowerAmd64(v *Value) bool { v.AddArg(v0) return true } - goto end569cc755877d1f89a701378bec05c08d - end569cc755877d1f89a701378bec05c08d: + goto end900aaaf28cefac6bb62e76b5151611cf + end900aaaf28cefac6bb62e76b5151611cf: ; case OpStore: // match: (Store ptr val mem) diff --git a/src/cmd/internal/ssa/op.go b/src/cmd/internal/ssa/op.go index ebe4a8e747..e0dc531fc9 100644 --- a/src/cmd/internal/ssa/op.go +++ b/src/cmd/internal/ssa/op.go @@ -4,6 +4,8 @@ package ssa +import "fmt" + // An Op encodes the specific operation that a Value performs. // Opcodes' semantics can be modified by the type and aux fields of the Value. // For instance, OpAdd can be 32 or 64 bit, signed or unsigned, float or complex, depending on Value.Type. @@ -47,8 +49,11 @@ const ( OpArg // address of a function parameter/result. Memory input is an arg called ".mem". aux is a string (TODO: make it something other than a string?) OpGlobal // the address of a global variable aux.(*gc.Sym) OpFunc // entry address of a function + OpFP // frame pointer + OpSP // stack pointer OpCopy // output = arg0 + OpMove // arg0=destptr, arg1=srcptr, arg2=mem, aux.(int64)=size. Returns memory. OpPhi // select an argument based on which predecessor block we came from OpSliceMake // arg0=ptr, arg1=len, arg2=cap @@ -62,7 +67,8 @@ const ( OpLoad // Load from arg0+aux.(int64). arg1=memory OpStore // Store arg1 to arg0+aux.(int64). arg2=memory. Returns memory. - OpSliceIndex // arg0=slice, arg1=index, arg2=memory + OpArrayIndex // arg0=array, arg1=index. Returns a[i] + OpPtrIndex // arg0=ptr, arg1=index. Computes ptr+sizeof(*v.type)*index, where index is extended to ptrwidth type OpIsNonNil // arg0 != nil OpIsInBounds // 0 <= arg0 < arg1 @@ -75,6 +81,8 @@ const ( OpConvert // convert arg0 to another type OpConvNop // interpret arg0 as another type + OpOffPtr // arg0 + aux.(int64) (arg0 and result are pointers) + // These ops return a pointer to a location on the stack. OpFPAddr // FP + aux.(int64) (+ == args from caller, - == locals) OpSPAddr // SP + aux.(int64) @@ -96,6 +104,15 @@ type GlobalOffset struct { Offset int64 } +// offset adds x to the location specified by g and returns it. +func (g GlobalOffset) offset(x int64) GlobalOffset { + return GlobalOffset{g.Global, g.Offset + x} +} + +func (g GlobalOffset) String() string { + return fmt.Sprintf("%v+%d", g.Global, g.Offset) +} + //go:generate stringer -type=Op type opInfo struct { diff --git a/src/cmd/internal/ssa/op_string.go b/src/cmd/internal/ssa/op_string.go index 0851cfe0fb..9b22f664ef 100644 --- a/src/cmd/internal/ssa/op_string.go +++ b/src/cmd/internal/ssa/op_string.go @@ -6,16 +6,16 @@ import "fmt" const ( _Op_name_0 = "opInvalid" - _Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpCopyOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpSliceIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpFwdRef" - _Op_name_2 = "opAMD64BaseOpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpADDLOpCMPQOpCMPCQOpTESTQOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQloadFPOpMOVQloadSPOpMOVQstoreFPOpMOVQstoreSPOpMOVQconst" + _Op_name_1 = "opGenericBaseOpAddOpSubOpMulOpLessOpConstOpArgOpGlobalOpFuncOpFPOpSPOpCopyOpMoveOpPhiOpSliceMakeOpSlicePtrOpSliceLenOpSliceCapOpStringMakeOpStringPtrOpStringLenOpLoadOpStoreOpArrayIndexOpPtrIndexOpIsNonNilOpIsInBoundsOpCallOpStaticCallOpConvertOpConvNopOpOffPtrOpFPAddrOpSPAddrOpStoreReg8OpLoadReg8OpFwdRef" + _Op_name_2 = "opAMD64BaseOpADDQOpSUBQOpADDCQOpSUBCQOpMULQOpMULCQOpSHLQOpSHLCQOpNEGQOpADDLOpCMPQOpCMPCQOpTESTQOpTESTBOpSETEQOpSETNEOpSETLOpSETGEOpSETBOpInvertFlagsOpLEAQOpLEAQ2OpLEAQ4OpLEAQ8OpLEAQglobalOpMOVBloadOpMOVBQZXloadOpMOVBQSXloadOpMOVQloadOpMOVQstoreOpMOVQloadidx8OpMOVQstoreidx8OpMOVQloadglobalOpMOVQstoreglobalOpMOVQconstOpREPMOVSB" _Op_name_3 = "op386Base" _Op_name_4 = "opMax" ) var ( _Op_index_0 = [...]uint8{0, 9} - _Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 34, 41, 46, 54, 60, 66, 71, 82, 92, 102, 112, 124, 135, 146, 152, 159, 171, 181, 193, 199, 211, 220, 229, 237, 245, 256, 266, 274} - _Op_index_2 = [...]uint16{0, 11, 17, 23, 30, 37, 43, 50, 56, 63, 69, 75, 81, 88, 95, 102, 109, 115, 122, 128, 141, 147, 154, 161, 168, 178, 189, 203, 218, 234, 251, 263, 275, 288, 301, 312} + _Op_index_1 = [...]uint16{0, 13, 18, 23, 28, 34, 41, 46, 54, 60, 64, 68, 74, 80, 85, 96, 106, 116, 126, 138, 149, 160, 166, 173, 185, 195, 205, 217, 223, 235, 244, 253, 261, 269, 277, 288, 298, 306} + _Op_index_2 = [...]uint16{0, 11, 17, 23, 30, 37, 43, 50, 56, 63, 69, 75, 81, 88, 95, 102, 109, 116, 122, 129, 135, 148, 154, 161, 168, 175, 187, 197, 210, 223, 233, 244, 258, 273, 289, 306, 317, 327} _Op_index_3 = [...]uint8{0, 9} _Op_index_4 = [...]uint8{0, 5} ) @@ -24,10 +24,10 @@ func (i Op) String() string { switch { case i == 0: return _Op_name_0 - case 1001 <= i && i <= 1032: + case 1001 <= i && i <= 1037: i -= 1001 return _Op_name_1[_Op_index_1[i]:_Op_index_1[i+1]] - case 2001 <= i && i <= 2035: + case 2001 <= i && i <= 2037: i -= 2001 return _Op_name_2[_Op_index_2[i]:_Op_index_2[i+1]] case i == 3001: diff --git a/src/cmd/internal/ssa/opamd64.go b/src/cmd/internal/ssa/opamd64.go index 8bdd19f713..46f0a69dfb 100644 --- a/src/cmd/internal/ssa/opamd64.go +++ b/src/cmd/internal/ssa/opamd64.go @@ -30,6 +30,7 @@ const ( OpCMPQ // arg0 compare to arg1 OpCMPCQ // arg0 compare to aux.(int64) OpTESTQ // (arg0 & arg1) compare to 0 + OpTESTB // (arg0 & arg1) compare to 0 // These opcodes extract a particular boolean condition from a flags value. OpSETEQ // extract == condition from arg0 @@ -43,29 +44,30 @@ const ( // This is a pseudo-op which can't appear in assembly output. OpInvertFlags // reverse direction of arg0 - OpLEAQ // arg0 + arg1 + aux.(int64) - OpLEAQ2 // arg0 + 2*arg1 + aux.(int64) - OpLEAQ4 // arg0 + 4*arg1 + aux.(int64) - OpLEAQ8 // arg0 + 8*arg1 + aux.(int64) + OpLEAQ // arg0 + arg1 + aux.(int64) + OpLEAQ2 // arg0 + 2*arg1 + aux.(int64) + OpLEAQ4 // arg0 + 4*arg1 + aux.(int64) + OpLEAQ8 // arg0 + 8*arg1 + aux.(int64) + OpLEAQglobal // no args. address of aux.(GlobalOffset) // Load/store from general address - OpMOVQload // Load from arg0+aux.(int64). arg1=memory + OpMOVBload // Load from arg0+aux.(int64). arg1=memory + OpMOVBQZXload + OpMOVBQSXload + OpMOVQload OpMOVQstore // Store arg1 to arg0+aux.(int64). arg2=memory, returns memory. OpMOVQloadidx8 // Load from arg0+arg1*8+aux.(int64). arg2=memory OpMOVQstoreidx8 // Store arg2 to arg0+arg1*8+aux.(int64). arg3=memory, returns memory. - // Load/store from global. aux.(GlobalOffset) encodes the global location. + // Load/store from global. Same as the above loads, but arg0 is missing and aux is a GlobalOffset instead of an int64. OpMOVQloadglobal // arg0 = memory OpMOVQstoreglobal // store arg0. arg1=memory, returns memory. - // Load/store from stack slot. - OpMOVQloadFP // load from FP+aux.(int64). arg0=memory - OpMOVQloadSP // load from SP+aux.(int64). arg0=memory - OpMOVQstoreFP // store arg0 to FP+aux.(int64). arg1=memory, returns memory. - OpMOVQstoreSP // store arg0 to SP+aux.(int64). arg1=memory, returns memory. - // materialize a constant into a register OpMOVQconst // (takes no arguments) + + // move memory + OpREPMOVSB // arg0=destptr, arg1=srcptr, arg2=len, arg3=mem ) type regMask uint64 @@ -89,13 +91,16 @@ var regsAMD64 = [...]string{ "R15", // pseudo registers + "FP", "FLAGS", "OVERWRITE0", // the same register as the first input } -var gp regMask = 0xef // all integer registers except SP -var cx regMask = 0x2 -var flags regMask = 1 << 16 +var gp regMask = 0x1ffff // all integer registers (including SP&FP) +var cx regMask = 1 << 1 +var si regMask = 1 << 6 +var di regMask = 1 << 7 +var flags regMask = 1 << 17 var ( // gp = general purpose (integer) registers @@ -129,13 +134,16 @@ var amd64Table = map[Op]opInfo{ OpCMPQ: {asm: "CMPQ\t%I0,%I1", reg: gp2_flags}, // compute arg[0]-arg[1] and produce flags OpCMPCQ: {asm: "CMPQ\t$%A,%I0", reg: gp1_flags}, OpTESTQ: {asm: "TESTQ\t%I0,%I1", reg: gp2_flags}, + OpTESTB: {asm: "TESTB\t%I0,%I1", reg: gp2_flags}, - OpLEAQ: {flags: OpFlagCommutative, asm: "LEAQ\t%A(%I0)(%I1*1),%O0", reg: gp21}, // aux = int64 constant to add - OpLEAQ2: {asm: "LEAQ\t%A(%I0)(%I1*2),%O0"}, - OpLEAQ4: {asm: "LEAQ\t%A(%I0)(%I1*4),%O0"}, - OpLEAQ8: {asm: "LEAQ\t%A(%I0)(%I1*8),%O0"}, + OpLEAQ: {flags: OpFlagCommutative, asm: "LEAQ\t%A(%I0)(%I1*1),%O0", reg: gp21}, // aux = int64 constant to add + OpLEAQ2: {asm: "LEAQ\t%A(%I0)(%I1*2),%O0"}, + OpLEAQ4: {asm: "LEAQ\t%A(%I0)(%I1*4),%O0"}, + OpLEAQ8: {asm: "LEAQ\t%A(%I0)(%I1*8),%O0"}, + OpLEAQglobal: {asm: "LEAQ\t%A(SB),%O0", reg: gp01}, // loads and stores + OpMOVBload: {asm: "MOVB\t%A(%I0),%O0", reg: gpload}, OpMOVQload: {asm: "MOVQ\t%A(%I0),%O0", reg: gpload}, OpMOVQstore: {asm: "MOVQ\t%I1,%A(%I0)", reg: gpstore}, OpMOVQloadidx8: {asm: "MOVQ\t%A(%I0)(%I1*8),%O0", reg: gploadidx}, @@ -145,23 +153,20 @@ var amd64Table = map[Op]opInfo{ OpStaticCall: {asm: "CALL\t%A(SB)"}, - OpCopy: {asm: "MOVQ\t%I0,%O0", reg: gp11}, + OpCopy: {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific + OpConvNop: {asm: "MOVQ\t%I0,%O0", reg: gp11}, // TODO: make arch-specific. Or get rid of this altogether. // convert from flags back to boolean OpSETL: {}, - // ops for load/store to stack - OpMOVQloadFP: {asm: "MOVQ\t%A(FP),%O0", reg: gpload_stack}, // mem -> value - OpMOVQloadSP: {asm: "MOVQ\t%A(SP),%O0", reg: gpload_stack}, // mem -> value - OpMOVQstoreFP: {asm: "MOVQ\t%I0,%A(FP)", reg: gpstore_stack}, // mem, value -> mem - OpMOVQstoreSP: {asm: "MOVQ\t%I0,%A(SP)", reg: gpstore_stack}, // mem, value -> mem - // ops for spilling of registers // unlike regular loads & stores, these take no memory argument. // They are just like OpCopy but we use them during register allocation. // TODO: different widths, float OpLoadReg8: {asm: "MOVQ\t%I0,%O0"}, OpStoreReg8: {asm: "MOVQ\t%I0,%O0"}, + + OpREPMOVSB: {asm: "REP MOVSB", reg: [2][]regMask{{di, si, cx, 0}, {0}}}, // TODO: record that si/di/cx are clobbered } func init() { diff --git a/src/cmd/internal/ssa/regalloc.go b/src/cmd/internal/ssa/regalloc.go index e2de10896e..c798d2e936 100644 --- a/src/cmd/internal/ssa/regalloc.go +++ b/src/cmd/internal/ssa/regalloc.go @@ -39,8 +39,9 @@ var registers = [...]Register{ // TODO X0, ... // TODO: make arch-dependent - Register{16, "FLAGS"}, - Register{17, "OVERWRITE"}, + Register{16, "FP"}, // pseudo-register, actually a constant offset from SP + Register{17, "FLAGS"}, + Register{18, "OVERWRITE"}, } // countRegs returns the number of set bits in the register mask. @@ -84,6 +85,19 @@ func regalloc(f *Func) { var oldSched []*Value + // Hack to find fp, sp Values and assign them a register. (TODO: make not so hacky) + var fp, sp *Value + for _, v := range f.Entry.Values { + switch v.Op { + case OpSP: + sp = v + home = setloc(home, v, ®isters[4]) // TODO: arch-dependent + case OpFP: + fp = v + home = setloc(home, v, ®isters[16]) // TODO: arch-dependent + } + } + // Register allocate each block separately. All live values will live // in home locations (stack slots) between blocks. for _, b := range f.Blocks { @@ -115,6 +129,10 @@ func regalloc(f *Func) { } regs := make([]regInfo, numRegs) + // TODO: hack: initialize fixed registers + regs[4] = regInfo{sp, sp, false} + regs[16] = regInfo{fp, fp, false} + var used regMask // has a 1 for each non-nil entry in regs var dirty regMask // has a 1 for each dirty entry in regs @@ -133,9 +151,6 @@ func regalloc(f *Func) { // - definition of v. c will be identical to v but will live in // a register. v will be modified into a spill of c. regspec := opcodeTable[v.Op].reg - if v.Op == OpConvNop { - regspec = opcodeTable[v.Args[0].Op].reg - } inputs := regspec[0] outputs := regspec[1] if len(inputs) == 0 && len(outputs) == 0 { @@ -154,6 +169,7 @@ func regalloc(f *Func) { // nospill contains registers that we can't spill because // we already set them up for use by the current instruction. var nospill regMask + nospill |= 0x10010 // SP and FP can't be spilled (TODO: arch-specific) // Move inputs into registers for _, o := range order { @@ -215,10 +231,16 @@ func regalloc(f *Func) { // Load w into this register var c *Value - if w.Op == OpConst { + if len(w.Args) == 0 { // Materialize w - // TODO: arch-specific MOV op - c = b.NewValue(OpMOVQconst, w.Type, w.Aux) + if w.Op == OpFP || w.Op == OpSP || w.Op == OpGlobal { + c = b.NewValue1(OpCopy, w.Type, nil, w) + } else { + c = b.NewValue(w.Op, w.Type, w.Aux) + } + } else if len(w.Args) == 1 && (w.Args[0].Op == OpFP || w.Args[0].Op == OpSP || w.Args[0].Op == OpGlobal) { + // Materialize offsets from SP/FP/Global + c = b.NewValue1(w.Op, w.Type, w.Aux, w.Args[0]) } else if wreg != 0 { // Copy from another register. // Typically just an optimization, but this is @@ -317,6 +339,10 @@ func regalloc(f *Func) { v := regs[r].v c := regs[r].c if lastUse[v.ID] <= len(oldSched) { + if v == v.Block.Control { + // link control value to register version + v.Block.Control = c + } continue // not live after block } @@ -334,6 +360,7 @@ func regalloc(f *Func) { } } f.RegAlloc = home + deadcode(f) // remove values that had all of their uses rematerialized. TODO: separate pass? } // addPhiCopies adds copies of phi inputs in the blocks diff --git a/src/cmd/internal/ssa/rewrite.go b/src/cmd/internal/ssa/rewrite.go index 855719a877..75e910d690 100644 --- a/src/cmd/internal/ssa/rewrite.go +++ b/src/cmd/internal/ssa/rewrite.go @@ -4,14 +4,14 @@ package ssa -import "fmt" +import "log" func applyRewrite(f *Func, r func(*Value) bool) { // repeat rewrites until we find no more rewrites var curv *Value defer func() { if curv != nil { - fmt.Printf("panic during rewrite of %s\n", curv.LongString()) + log.Printf("panic during rewrite of %s\n", curv.LongString()) // TODO(khr): print source location also } }() @@ -19,6 +19,18 @@ func applyRewrite(f *Func, r func(*Value) bool) { change := false for _, b := range f.Blocks { for _, v := range b.Values { + // elide any copies generated during rewriting + for i, a := range v.Args { + if a.Op != OpCopy { + continue + } + for a.Op == OpCopy { + a = a.Args[0] + } + v.Args[i] = a + } + + // apply rewrite function curv = v if r(v) { change = true @@ -26,6 +38,7 @@ func applyRewrite(f *Func, r func(*Value) bool) { } } if !change { + curv = nil return } } @@ -52,3 +65,19 @@ func isSigned(t Type) bool { func typeSize(t Type) int64 { return t.Size() } + +// addOff adds two offset aux values. Each should be an int64. Fails if wraparound happens. +func addOff(a, b interface{}) interface{} { + x := a.(int64) + y := b.(int64) + z := x + y + // x and y have same sign and z has a different sign => overflow + if x^y >= 0 && x^z < 0 { + log.Panicf("offset overflow %d %d\n", x, y) + } + return z +} + +func inBounds(idx, len int64) bool { + return idx >= 0 && idx < len +} diff --git a/src/cmd/internal/ssa/rulegen/generic.rules b/src/cmd/internal/ssa/rulegen/generic.rules index d17449930f..c49d9d9f2e 100644 --- a/src/cmd/internal/ssa/rulegen/generic.rules +++ b/src/cmd/internal/ssa/rulegen/generic.rules @@ -3,17 +3,22 @@ // license that can be found in the LICENSE file. // constant folding -(Add (Const [c]) (Const [d])) && is64BitInt(t) && isSigned(t) -> (Const [{c.(int64)+d.(int64)}]) -(Add (Const [c]) (Const [d])) && is64BitInt(t) && !isSigned(t) -> (Const [{c.(uint64)+d.(uint64)}]) +(Add (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)+d.(int64)}]) +(Mul (Const [c]) (Const [d])) && is64BitInt(t) -> (Const [{c.(int64)*d.(int64)}]) +(IsInBounds (Const [c]) (Const [d])) -> (Const [inBounds(c.(int64),d.(int64))]) // tear apart slices // TODO: anything that generates a slice needs to go in here. (SlicePtr (Load ptr mem)) -> (Load ptr mem) -(SliceLen (Load ptr mem)) -> (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize)])) mem) -(SliceCap (Load ptr mem)) -> (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize*2)])) mem) - -// expand array indexing -// others? Depends on what is already done by frontend +(SliceLen (Load ptr mem)) -> (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize)])) mem) +(SliceCap (Load ptr mem)) -> (Load (Add ptr (Const [int64(v.Block.Func.Config.ptrSize*2)])) mem) +// indexing operations // Note: bounds check has already been done -(SliceIndex s i mem) -> (Load (Add (SlicePtr s) (Mul i (Const [s.Type.Elem().Size()]))) mem) +(ArrayIndex (Load ptr mem) idx) -> (Load (PtrIndex ptr idx) mem) +(PtrIndex ptr idx) -> (Add ptr (Mul idx (Const [t.Elem().Size()]))) +// TODO: hopefully this will get rid of all full-width array copies. + +// big-object moves +// TODO: fix size +(Store dst (Load src mem) mem) && t.Size() > 8 -> (Move [t.Size()] dst src mem) diff --git a/src/cmd/internal/ssa/rulegen/lower_amd64.rules b/src/cmd/internal/ssa/rulegen/lower_amd64.rules index 55267d6842..0fed21e740 100644 --- a/src/cmd/internal/ssa/rulegen/lower_amd64.rules +++ b/src/cmd/internal/ssa/rulegen/lower_amd64.rules @@ -30,6 +30,7 @@ (Less x y) && is64BitInt(v.Args[0].Type) && isSigned(v.Args[0].Type) -> (SETL (CMPQ x y)) +(Load ptr mem) && t.IsBoolean() -> (MOVBload [int64(0)] ptr mem) (Load ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload [int64(0)] ptr mem) (Store ptr val mem) && (is64BitInt(val.Type) || isPtr(val.Type)) -> (MOVQstore [int64(0)] ptr val mem) @@ -37,28 +38,27 @@ (IsNonNil p) -> (SETNE (TESTQ p p)) (IsInBounds idx len) -> (SETB (CMPQ idx len)) +(Move [size] dst src mem) -> (REPMOVSB dst src (Const [size.(int64)]) mem) + +(OffPtr [off] ptr) -> (ADDCQ [off] ptr) + +(Const [val]) && is64BitInt(t) -> (MOVQconst [val]) + // Rules below here apply some simple optimizations after lowering. // TODO: Should this be a separate pass? -// stack loads/stores -(MOVQload [off1] (FPAddr [off2]) mem) -> (MOVQloadFP [off1.(int64)+off2.(int64)] mem) -(MOVQload [off1] (SPAddr [off2]) mem) -> (MOVQloadSP [off1.(int64)+off2.(int64)] mem) -(MOVQstore [off1] (FPAddr [off2]) val mem) -> (MOVQstoreFP [off1.(int64)+off2.(int64)] val mem) -(MOVQstore [off1] (SPAddr [off2]) val mem) -> (MOVQstoreSP [off1.(int64)+off2.(int64)] val mem) - // global loads/stores -(MOVQload [off] (Global [sym]) mem) -> (MOVQloadglobal [GlobalOffset{sym,off.(int64)}] mem) -(MOVQstore [off] (Global [sym]) val mem) -> (MOVQstoreglobal [GlobalOffset{sym,off.(int64)}] val mem) +(Global [sym]) -> (LEAQglobal [GlobalOffset{sym,0}]) // fold constants into instructions -(ADDQ x (Const [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range? -(ADDQ (Const [c]) x) -> (ADDCQ [c] x) -(SUBQ x (Const [c])) -> (SUBCQ x [c]) -(SUBQ (Const [c]) x) -> (NEGQ (SUBCQ x [c])) -(MULQ x (Const [c])) -> (MULCQ [c] x) -(MULQ (Const [c]) x) -> (MULCQ [c] x) -(CMPQ x (Const [c])) -> (CMPCQ x [c]) -(CMPQ (Const [c]) x) -> (InvertFlags (CMPCQ x [c])) +(ADDQ x (MOVQconst [c])) -> (ADDCQ [c] x) // TODO: restrict c to int32 range? +(ADDQ (MOVQconst [c]) x) -> (ADDCQ [c] x) +(SUBQ x (MOVQconst [c])) -> (SUBCQ x [c]) +(SUBQ (MOVQconst [c]) x) -> (NEGQ (SUBCQ x [c])) +(MULQ x (MOVQconst [c])) -> (MULCQ [c] x) +(MULQ (MOVQconst [c]) x) -> (MULCQ [c] x) +(CMPQ x (MOVQconst [c])) -> (CMPCQ x [c]) +(CMPQ (MOVQconst [c]) x) -> (InvertFlags (CMPCQ x [c])) // strength reduction // TODO: do this a lot more generically @@ -66,7 +66,7 @@ // fold add/shift into leaq (ADDQ x (SHLCQ [shift] y)) && shift.(int64) == 3 -> (LEAQ8 [int64(0)] x y) -(ADDCQ [c] (LEAQ8 [d] x y)) -> (LEAQ8 [c.(int64)+d.(int64)] x y) +(ADDCQ [c] (LEAQ8 [d] x y)) -> (LEAQ8 [addOff(c, d)] x y) // reverse ordering of compare instruction (SETL (InvertFlags x)) -> (SETGE x) @@ -76,13 +76,14 @@ // the ADDCQ get eliminated, we still have to compute the ADDCQ and we now // have potentially two live values (ptr and (ADDCQ [off] ptr)) instead of one. // Nevertheless, let's do it! -(MOVQload [off1] (ADDCQ [off2] ptr) mem) -> (MOVQload [off1.(int64)+off2.(int64)] ptr mem) -(MOVQstore [off1] (ADDCQ [off2] ptr) val mem) -> (MOVQstore [off1.(int64)+off2.(int64)] ptr val mem) +(MOVQload [off1] (ADDCQ [off2] ptr) mem) -> (MOVQload [addOff(off1, off2)] ptr mem) +(MOVQstore [off1] (ADDCQ [off2] ptr) val mem) -> (MOVQstore [addOff(off1, off2)] ptr val mem) // indexed loads and stores -(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [off1.(int64)+off2.(int64)] ptr idx mem) -(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [off1.(int64)+off2.(int64)] ptr idx val mem) +(MOVQload [off1] (LEAQ8 [off2] ptr idx) mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) +(MOVQstore [off1] (LEAQ8 [off2] ptr idx) val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) + +(MOVQloadidx8 [off1] (ADDCQ [off2] ptr) idx mem) -> (MOVQloadidx8 [addOff(off1, off2)] ptr idx mem) +(MOVQstoreidx8 [off1] (ADDCQ [off2] ptr) idx val mem) -> (MOVQstoreidx8 [addOff(off1, off2)] ptr idx val mem) -// Combine the offset of a stack object with the offset within a stack object -(ADDCQ [off1] (FPAddr [off2])) -> (FPAddr [off1.(int64)+off2.(int64)]) -(ADDCQ [off1] (SPAddr [off2])) -> (SPAddr [off1.(int64)+off2.(int64)]) +(ADDCQ [off] x) && off.(int64) == 0 -> (Copy x) diff --git a/src/cmd/internal/ssa/rulegen/rulegen.go b/src/cmd/internal/ssa/rulegen/rulegen.go index 31f46f7cce..4ac930298b 100644 --- a/src/cmd/internal/ssa/rulegen/rulegen.go +++ b/src/cmd/internal/ssa/rulegen/rulegen.go @@ -245,6 +245,12 @@ func genResult(w io.Writer, result string) { func genResult0(w io.Writer, result string, alloc *int, top bool) string { if result[0] != '(' { // variable + if top { + fmt.Fprintf(w, "v.Op = %s.Op\n", result) + fmt.Fprintf(w, "v.Aux = %s.Aux\n", result) + fmt.Fprintf(w, "v.resetArgs()\n") + fmt.Fprintf(w, "v.AddArgs(%s.Args...)\n", result) + } return result } @@ -297,20 +303,33 @@ func split(s string) []string { outer: for s != "" { - d := 0 // depth of ({[< - nonsp := false // found a non-space char so far + d := 0 // depth of ({[< + var open, close byte // opening and closing markers ({[< or )}]> + nonsp := false // found a non-space char so far for i := 0; i < len(s); i++ { - switch s[i] { - case '(', '{', '[', '<': + switch { + case d == 0 && s[i] == '(': + open, close = '(', ')' d++ - case ')', '}', ']', '>': - d-- - case ' ', '\t': - if d == 0 && nonsp { + case d == 0 && s[i] == '<': + open, close = '<', '>' + d++ + case d == 0 && s[i] == '[': + open, close = '[', ']' + d++ + case d == 0 && s[i] == '{': + open, close = '{', '}' + d++ + case d == 0 && (s[i] == ' ' || s[i] == '\t'): + if nonsp { r = append(r, strings.TrimSpace(s[:i])) s = s[i:] continue outer } + case d > 0 && s[i] == open: + d++ + case d > 0 && s[i] == close: + d-- default: nonsp = true } diff --git a/src/cmd/internal/ssa/stackalloc.go b/src/cmd/internal/ssa/stackalloc.go index 4d0359ed81..8a315e1045 100644 --- a/src/cmd/internal/ssa/stackalloc.go +++ b/src/cmd/internal/ssa/stackalloc.go @@ -15,6 +15,9 @@ func stackalloc(f *Func) { if v.Op != OpPhi { continue } + if v.Type.IsMemory() { // TODO: only "regallocable" types + continue + } n += v.Type.Size() // a := v.Type.Align() // n = (n + a - 1) / a * a TODO @@ -35,10 +38,11 @@ func stackalloc(f *Func) { if v.Type.IsMemory() { // TODO: only "regallocable" types continue } - if v.Op == OpConst { - // don't allocate space for OpConsts. They should - // have been rematerialized everywhere. - // TODO: is this the right thing to do? + if len(v.Args) == 0 { + // v will have been materialized wherever it is needed. + continue + } + if len(v.Args) == 1 && (v.Args[0].Op == OpFP || v.Args[0].Op == OpSP || v.Args[0].Op == OpGlobal) { continue } // a := v.Type.Align() -- 2.48.1