From: Rob Pike Date: Thu, 22 Jan 2015 18:48:33 +0000 (-0800) Subject: [dev.cc] cmd/asm: the assembler proper X-Git-Tag: go1.5beta1~1915^2~96 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=fdeee3a538e4816540e46195e7c14af2ad4ad190;p=gostls13.git [dev.cc] cmd/asm: the assembler proper Add main.go, the simple driver for the assembler, and the subdirectory internal/asm, which contains the parser and instruction generator. It's likely that much of the implementation is superstition, or at best experimental phenomenology, but it does generate working binaries. Change-Id: I322a9ae8a20174b6693153f30e39217ba68f8032 Reviewed-on: https://go-review.googlesource.com/3196 Reviewed-by: Russ Cox --- diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go new file mode 100644 index 0000000000..4ffbe558c6 --- /dev/null +++ b/src/cmd/asm/internal/asm/asm.go @@ -0,0 +1,533 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asm + +import ( + "fmt" + "strings" + "text/scanner" + + "cmd/asm/internal/addr" + "cmd/asm/internal/arch" + "cmd/asm/internal/lex" + "cmd/internal/obj" +) + +// TODO: This package has many numeric conversions that should be unnecessary. + +// symbolType returns the extern/static etc. type appropriate for the symbol. +func (p *Parser) symbolType(a *addr.Addr) int { + switch a.Register { + case arch.RFP: + return p.arch.D_PARAM + case arch.RSP: + return p.arch.D_AUTO + case arch.RSB: + // See comment in addrToAddr. + if a.IsImmediateAddress { + return p.arch.D_ADDR + } + if a.IsStatic { + return p.arch.D_STATIC + } + return p.arch.D_EXTERN + } + p.errorf("invalid register for symbol %s", a.Symbol) + return 0 +} + +// TODO: configure the architecture + +func (p *Parser) addrToAddr(a *addr.Addr) obj.Addr { + out := p.arch.NoAddr + if a.Has(addr.Symbol) { + // How to encode the symbols: + // syntax = Typ,Index + // $a(SB) = ADDR,EXTERN + // $a<>(SB) = ADDR,STATIC + // a(SB) = EXTERN,NONE + // a<>(SB) = STATIC,NONE + // The call to symbolType does the first column; we need to fix up Index here. + out.Type = int16(p.symbolType(a)) + out.Sym = obj.Linklookup(p.linkCtxt, a.Symbol, 0) + if a.IsImmediateAddress { + // Index field says whether it's a static. + switch a.Register { + case arch.RSB: + if a.IsStatic { + out.Index = uint8(p.arch.D_STATIC) + } else { + out.Index = uint8(p.arch.D_EXTERN) + } + default: + p.errorf("can't handle immediate address of %s not (SB)\n", a.Symbol) + } + } + } else if a.Has(addr.Register) { + // TODO: SP is tricky, and this isn't good enough. + // SP = D_SP + // 4(SP) = 4(D_SP) + // x+4(SP) = D_AUTO with sym=x TODO + out.Type = a.Register + if a.Register == arch.RSP { + out.Type = int16(p.arch.SP) + } + if a.IsIndirect { + out.Type += p.arch.D_INDIR + } + // a.Register2 handled in the instruction method; it's bizarre. + } + if a.Has(addr.Index) { + out.Index = uint8(a.Index) // TODO: out.Index == p.NoArch.Index should be same type as Register. + } + if a.Has(addr.Scale) { + out.Scale = a.Scale + } + if a.Has(addr.Offset) { + out.Offset = a.Offset + if a.Is(addr.Offset) { + // RHS of MOVL $0xf1, 0xf1 // crash + out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE) + } else if a.IsImmediateConstant && out.Type == int16(p.arch.D_NONE) { + out.Type = int16(p.arch.D_CONST) + } + } + if a.Has(addr.Float) { + out.U.Dval = a.Float + out.Type = int16(p.arch.D_FCONST) + } + if a.Has(addr.String) { + out.U.Sval = a.String + out.Type = int16(p.arch.D_SCONST) + } + // HACK TODO + if a.IsIndirect && !a.Has(addr.Register) && a.Has(addr.Index) { + // LHS of LEAQ 0(BX*8), CX + out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE) + } + return out +} + +func (p *Parser) link(prog *obj.Prog, doLabel bool) { + if p.firstProg == nil { + p.firstProg = prog + } else { + p.lastProg.Link = prog + } + p.lastProg = prog + if doLabel { + p.pc++ + for _, label := range p.pendingLabels { + if p.labels[label] != nil { + p.errorf("label %q multiply defined", label) + } + p.labels[label] = prog + } + p.pendingLabels = p.pendingLabels[0:0] + } + prog.Pc = int64(p.pc) + fmt.Println(p.histLineNum, prog) +} + +// asmText assembles a TEXT pseudo-op. +// TEXT runtime·sigtramp(SB),4,$0-0 +func (p *Parser) asmText(word string, operands [][]lex.Token) { + if len(operands) != 3 { + p.errorf("expect three operands for TEXT") + } + + // Operand 0 is the symbol name in the form foo(SB). + // That means symbol plus indirect on SB and no offset. + nameAddr := p.address(operands[0]) + if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB { + p.errorf("TEXT symbol %q must be an offset from SB", nameAddr.Symbol) + } + name := strings.Replace(nameAddr.Symbol, "·", ".", 1) + + // Operand 1 is the text flag, a literal integer. + flagAddr := p.address(operands[1]) + if !flagAddr.Is(addr.Offset) { + p.errorf("TEXT flag for %s must be an integer", name) + } + flag := int8(flagAddr.Offset) + + // Operand 2 is the frame and arg size. + // Bizarre syntax: $a-b is two words, not subtraction. + // We might even see $-b, which means $0-b. Ugly. + // Assume if it has this syntax that b is a plain constant. + // Not clear we can do better, but it doesn't matter. + op := operands[2] + n := len(op) + var locals int64 + if n >= 2 && op[n-2].ScanToken == '-' && op[n-1].ScanToken == scanner.Int { + p.start(op[n-1:]) + locals = int64(p.expr()) + op = op[:n-2] + } + args := int64(0) + if len(op) == 1 && op[0].ScanToken == '$' { + // Special case for $-8. + // Done; args is zero. + } else { + argsAddr := p.address(op) + if !argsAddr.Is(addr.ImmediateConstant | addr.Offset) { + p.errorf("TEXT frame size for %s must be an immediate constant", name) + } + args = argsAddr.Offset + } + + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + As: int16(p.arch.ATEXT), + Lineno: int32(p.histLineNum), + From: obj.Addr{ + Type: int16(p.symbolType(&nameAddr)), + Index: uint8(p.arch.D_NONE), + Sym: obj.Linklookup(p.linkCtxt, name, 0), + Scale: flag, + }, + To: obj.Addr{ + Index: uint8(p.arch.D_NONE), + }, + } + // Encoding of arg and locals depends on architecture. + switch p.arch.Thechar { + case '6': + prog.To.Type = int16(p.arch.D_CONST) + prog.To.Offset = (locals << 32) | args + case '8': + prog.To.Type = p.arch.D_CONST2 + prog.To.Offset = args + prog.To.Offset2 = int32(locals) + default: + p.errorf("internal error: can't encode TEXT arg/frame") + } + p.link(prog, true) +} + +// asmData assembles a DATA pseudo-op. +// DATA masks<>+0x00(SB)/4, $0x00000000 +func (p *Parser) asmData(word string, operands [][]lex.Token) { + if len(operands) != 2 { + p.errorf("expect two operands for DATA") + } + + // Operand 0 has the general form foo<>+0x04(SB)/4. + op := operands[0] + n := len(op) + if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int { + p.errorf("expect /size for DATA argument") + } + scale := p.scale(op[n-1].String()) + op = op[:n-2] + nameAddr := p.address(op) + ok := nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect|addr.Offset) + if !ok || nameAddr.Register != arch.RSB { + p.errorf("DATA symbol %q must be an offset from SB", nameAddr.Symbol) + } + name := strings.Replace(nameAddr.Symbol, "·", ".", 1) + + // Operand 1 is an immediate constant or address. + valueAddr := p.address(operands[1]) + if !valueAddr.IsImmediateConstant && !valueAddr.IsImmediateAddress { + p.errorf("DATA value must be an immediate constant or address") + } + + // The addresses must not overlap. Easiest test: require monotonicity. + if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr { + p.errorf("overlapping DATA entry for %s", nameAddr.Symbol) + } + p.dataAddr[name] = nameAddr.Offset + int64(scale) + + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + As: int16(p.arch.ADATA), + Lineno: int32(p.histLineNum), + From: obj.Addr{ + Type: int16(p.symbolType(&nameAddr)), + Index: uint8(p.arch.D_NONE), + Sym: obj.Linklookup(p.linkCtxt, name, 0), + Offset: nameAddr.Offset, + Scale: scale, + }, + To: p.addrToAddr(&valueAddr), + } + + p.link(prog, false) +} + +// asmGlobl assembles a GLOBL pseudo-op. +// GLOBL shifts<>(SB),8,$256 +// GLOBL shifts<>(SB),$256 +func (p *Parser) asmGlobl(word string, operands [][]lex.Token) { + if len(operands) != 2 && len(operands) != 3 { + p.errorf("expect two or three operands for GLOBL") + } + + // Operand 0 has the general form foo<>+0x04(SB). + nameAddr := p.address(operands[0]) + if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB { + p.errorf("GLOBL symbol %q must be an offset from SB", nameAddr.Symbol) + } + name := strings.Replace(nameAddr.Symbol, "·", ".", 1) + + // If three operands, middle operand is a scale. + scale := int8(0) + op := operands[1] + if len(operands) == 3 { + scaleAddr := p.address(op) + if !scaleAddr.Is(addr.Offset) { + p.errorf("GLOBL scale must be a constant") + } + scale = int8(scaleAddr.Offset) + op = operands[2] + } + + // Final operand is an immediate constant. + sizeAddr := p.address(op) + if !sizeAddr.Is(addr.ImmediateConstant | addr.Offset) { + p.errorf("GLOBL size must be an immediate constant") + } + size := sizeAddr.Offset + + // log.Printf("GLOBL %s %d, $%d", name, scale, size) + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + As: int16(p.arch.AGLOBL), + Lineno: int32(p.histLineNum), + From: obj.Addr{ + Type: int16(p.symbolType(&nameAddr)), + Index: uint8(p.arch.D_NONE), + Sym: obj.Linklookup(p.linkCtxt, name, 0), + Offset: nameAddr.Offset, + Scale: scale, + }, + To: obj.Addr{ + Type: int16(p.arch.D_CONST), + Index: uint8(p.arch.D_NONE), + Offset: size, + }, + } + p.link(prog, false) +} + +// asmPCData assembles a PCDATA pseudo-op. +// PCDATA $2, $705 +func (p *Parser) asmPCData(word string, operands [][]lex.Token) { + if len(operands) != 2 { + p.errorf("expect two operands for PCDATA") + } + + // Operand 0 must be an immediate constant. + addr0 := p.address(operands[0]) + if !addr0.Is(addr.ImmediateConstant | addr.Offset) { + p.errorf("PCDATA value must be an immediate constant") + } + value0 := addr0.Offset + + // Operand 1 must be an immediate constant. + addr1 := p.address(operands[1]) + if !addr1.Is(addr.ImmediateConstant | addr.Offset) { + p.errorf("PCDATA value must be an immediate constant") + } + value1 := addr1.Offset + + // log.Printf("PCDATA $%d, $%d", value0, value1) + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + As: int16(p.arch.APCDATA), + Lineno: int32(p.histLineNum), + From: obj.Addr{ + Type: int16(p.arch.D_CONST), + Index: uint8(p.arch.D_NONE), + Offset: value0, + }, + To: obj.Addr{ + Type: int16(p.arch.D_CONST), + Index: uint8(p.arch.D_NONE), + Offset: value1, + }, + } + p.link(prog, true) +} + +// asmFuncData assembles a FUNCDATA pseudo-op. +// FUNCDATA $1, funcdata<>+4(SB) +func (p *Parser) asmFuncData(word string, operands [][]lex.Token) { + if len(operands) != 2 { + p.errorf("expect two operands for FUNCDATA") + } + + // Operand 0 must be an immediate constant. + valueAddr := p.address(operands[0]) + if !valueAddr.Is(addr.ImmediateConstant | addr.Offset) { + p.errorf("FUNCDATA value must be an immediate constant") + } + value := valueAddr.Offset + + // Operand 1 is a symbol name in the form foo(SB). + // That means symbol plus indirect on SB and no offset. + nameAddr := p.address(operands[1]) + if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB { + p.errorf("FUNCDATA symbol %q must be an offset from SB", nameAddr.Symbol) + } + name := strings.Replace(nameAddr.Symbol, "·", ".", 1) + + // log.Printf("FUNCDATA %s, $%d", name, value) + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + As: int16(p.arch.AFUNCDATA), + Lineno: int32(p.histLineNum), + From: obj.Addr{ + Type: int16(p.arch.D_CONST), + Index: uint8(p.arch.D_NONE), + Offset: value, + }, + To: obj.Addr{ + Type: int16(p.symbolType(&nameAddr)), + Index: uint8(p.arch.D_NONE), + Sym: obj.Linklookup(p.linkCtxt, name, 0), + }, + } + p.link(prog, true) +} + +// asmJump assembles a jump instruction. +// JMP R1 +// JMP exit +// JMP 3(PC) +func (p *Parser) asmJump(op int, a []addr.Addr) { + var target *addr.Addr + switch len(a) { + default: + p.errorf("jump must have one or two addresses") + case 1: + target = &a[0] + case 2: + if !a[0].Is(0) { + p.errorf("two-address jump must have empty first address") + } + target = &a[1] + } + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + Lineno: int32(p.histLineNum), + As: int16(op), + From: p.arch.NoAddr, + } + switch { + case target.Is(addr.Register): + // JMP R1 + prog.To = p.addrToAddr(target) + case target.Is(addr.Symbol): + // JMP exit + targetProg := p.labels[target.Symbol] + if targetProg == nil { + p.toPatch = append(p.toPatch, Patch{prog, target.Symbol}) + } else { + p.branch(prog, targetProg) + } + case target.Is(addr.Register | addr.Indirect), target.Is(addr.Register | addr.Indirect | addr.Offset): + // JMP 4(AX) + if target.Register == arch.RPC { + prog.To = obj.Addr{ + Type: int16(p.arch.D_BRANCH), + Index: uint8(p.arch.D_NONE), + Offset: p.pc + 1 + target.Offset, // +1 because p.pc is incremented in link, below. + } + } else { + prog.To = p.addrToAddr(target) + } + case target.Is(addr.Symbol | addr.Indirect | addr.Register): + // JMP main·morestack(SB) + if target.Register != arch.RSB { + p.errorf("jmp to symbol must be SB-relative") + } + prog.To = obj.Addr{ + Type: int16(p.arch.D_BRANCH), + Sym: obj.Linklookup(p.linkCtxt, target.Symbol, 0), + Index: uint8(p.arch.D_NONE), + Offset: target.Offset, + } + default: + p.errorf("cannot assemble jump %+v", target) + } + p.link(prog, true) +} + +func (p *Parser) patch() { + for _, patch := range p.toPatch { + targetProg := p.labels[patch.label] + if targetProg == nil { + p.errorf("undefined label %s", patch.label) + } else { + p.branch(patch.prog, targetProg) + } + } +} + +func (p *Parser) branch(jmp, target *obj.Prog) { + jmp.To = obj.Addr{ + Type: int16(p.arch.D_BRANCH), + Index: uint8(p.arch.D_NONE), + } + jmp.To.U.Branch = target +} + +// asmInstruction assembles an instruction. +// MOVW R9, (R10) +func (p *Parser) asmInstruction(op int, a []addr.Addr) { + prog := &obj.Prog{ + Ctxt: p.linkCtxt, + Lineno: int32(p.histLineNum), + As: int16(op), + } + switch len(a) { + case 0: + prog.From = p.arch.NoAddr + prog.To = p.arch.NoAddr + case 1: + if p.arch.UnaryDestination[op] { + prog.From = p.arch.NoAddr + prog.To = p.addrToAddr(&a[0]) + } else { + prog.From = p.addrToAddr(&a[0]) + prog.To = p.arch.NoAddr + } + case 2: + prog.From = p.addrToAddr(&a[0]) + prog.To = p.addrToAddr(&a[1]) + // DX:AX as a register pair can only appear on the RHS. + // Bizarrely, to obj it's specified by setting index on the LHS. + // TODO: can we fix this? + if a[1].Has(addr.Register2) { + if int(prog.From.Index) != p.arch.D_NONE { + p.errorf("register pair operand on RHS must have register on LHS") + } + prog.From.Index = uint8(a[1].Register2) + } + case 3: + // CMPSD etc.; third operand is imm8, stored in offset, or a register. + prog.From = p.addrToAddr(&a[0]) + prog.To = p.addrToAddr(&a[1]) + switch { + case a[2].Is(addr.Offset): + prog.To.Offset = a[2].Offset + case a[2].Is(addr.Register): + // Strange reodering. + prog.To = p.addrToAddr(&a[2]) + prog.From = p.addrToAddr(&a[1]) + if !a[0].IsImmediateConstant { + p.errorf("expected $value for 1st operand") + } + prog.To.Offset = a[0].Offset + default: + p.errorf("expected offset or register for 3rd operand") + } + default: + p.errorf("can't handle instruction with %d operands", len(a)) + } + p.link(prog, true) +} diff --git a/src/cmd/asm/internal/asm/overflow.go b/src/cmd/asm/internal/asm/overflow.go new file mode 100644 index 0000000000..9e03e7acbc --- /dev/null +++ b/src/cmd/asm/internal/asm/overflow.go @@ -0,0 +1,94 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package asm + +/* + Tested with uint8s like this: + + for a := 0; a <= 255; a++ { + for b := 0; b <= 127; b++ { + ovfl := a+b != int(uint8(a)+uint8(b)) + if addOverflows(uint8(a), uint8(b)) != ovfl { + fmt.Printf("%d+%d fails\n", a, b) + break + } + } + } + for a := 0; a <= 255; a++ { + for b := 0; b <= 127; b++ { + ovfl := a-b != int(uint8(a)-uint8(b)) + if subOverflows(uint8(a), uint8(b)) != ovfl { + fmt.Printf("%d-%d fails\n", a, b) + break + } + } + } + for a := 0; a <= 255; a++ { + for b := 0; b <= 255; b++ { + ovfl := a*b != int(uint8(a)*uint8(b)) + if mulOverflows(uint8(a), uint8(b)) != ovfl { + fmt.Printf("%d*%d fails\n", a, b) + } + } + } +*/ + +func addOverflows(a, b uint64) bool { + return a+b < a +} + +func subOverflows(a, b uint64) bool { + return a-b > a +} + +func mulOverflows(a, b uint64) bool { + if a <= 1 || b <= 1 { + return false + } + c := a * b + return c/b != a +} + +/* +For the record, signed overflow: + +const mostNegative = -(mostPositive + 1) +const mostPositive = 1<<63 - 1 + +func signedAddOverflows(a, b int64) bool { + if (a >= 0) != (b >= 0) { + // Different signs cannot overflow. + return false + } + if a >= 0 { + // Both are positive. + return a+b < 0 + } + return a+b >= 0 +} + +func signedSubOverflows(a, b int64) bool { + if (a >= 0) == (b >= 0) { + // Same signs cannot overflow. + return false + } + if a >= 0 { + // a positive, b negative. + return a-b < 0 + } + return a-b >= 0 +} + +func signedMulOverflows(a, b int64) bool { + if a == 0 || b == 0 || a == 1 || b == 1 { + return false + } + if a == mostNegative || b == mostNegative { + return true + } + c := a * b + return c/b != a +} +*/ diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go new file mode 100644 index 0000000000..377f2ac121 --- /dev/null +++ b/src/cmd/asm/internal/asm/parse.go @@ -0,0 +1,527 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package asm implements the parser and instruction generator for the assembler. +// TODO: Split apart? +package asm + +import ( + "fmt" + "log" + "os" + "strconv" + "text/scanner" + + "cmd/asm/internal/addr" + "cmd/asm/internal/arch" + "cmd/asm/internal/lex" + "cmd/internal/obj" +) + +type Parser struct { + lex lex.TokenReader + lineNum int // Line number in source file. + histLineNum int // Cumulative line number across source files. + errorLine int // (Cumulative) line number of last error. + errorCount int // Number of errors. + pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. + input []lex.Token + inputPos int + pendingLabels []string // Labels to attach to next instruction. + labels map[string]*obj.Prog + toPatch []Patch + addr []addr.Addr + arch *arch.Arch + linkCtxt *obj.Link + firstProg *obj.Prog + lastProg *obj.Prog + dataAddr map[string]int64 // Most recent address for DATA for this symbol. +} + +type Patch struct { + prog *obj.Prog + label string +} + +func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { + return &Parser{ + linkCtxt: ctxt, + arch: ar, + lex: lexer, + labels: make(map[string]*obj.Prog), + dataAddr: make(map[string]int64), + } +} + +func (p *Parser) errorf(format string, args ...interface{}) { + if p.histLineNum == p.errorLine { + // Only one error per line. + return + } + p.errorLine = p.histLineNum + // Put file and line information on head of message. + format = "%s:%d: " + format + "\n" + args = append([]interface{}{p.lex.File(), p.lineNum}, args...) + fmt.Fprintf(os.Stderr, format, args...) + p.errorCount++ + if p.errorCount > 10 { + log.Fatal("too many errors") + } +} + +func (p *Parser) Parse() (*obj.Prog, bool) { + for p.line() { + } + if p.errorCount > 0 { + return nil, false + } + p.patch() + return p.firstProg, true +} + +// WORD op {, op} '\n' +func (p *Parser) line() bool { + // Skip newlines. + var tok lex.ScanToken + for { + tok = p.lex.Next() + // We save the line number here so error messages from this instruction + // are labeled with this line. Otherwise we complain after we've absorbed + // the terminating newline and the line numbers are off by one in errors. + p.lineNum = p.lex.Line() + p.histLineNum = lex.HistLine() + switch tok { + case '\n': + continue + case scanner.EOF: + return false + } + break + } + // First item must be an identifier. + if tok != scanner.Ident { + p.errorf("expected identifier, found %q", p.lex.Text()) + return false // Might as well stop now. + } + word := p.lex.Text() + operands := make([][]lex.Token, 0, 3) + // Zero or more comma-separated operands, one per loop. + first := true // Permit ':' to define this as a label. + for tok != '\n' && tok != ';' { + // Process one operand. + items := make([]lex.Token, 0, 3) + for { + tok = p.lex.Next() + if first { + if tok == ':' { + p.pendingLabels = append(p.pendingLabels, word) + return true + } + first = false + } + if tok == scanner.EOF { + p.errorf("unexpected EOF") + return false + } + if tok == '\n' || tok == ';' || tok == ',' { + break + } + items = append(items, lex.Make(tok, p.lex.Text())) + } + if len(items) > 0 { + operands = append(operands, items) + } else if len(operands) > 0 { + // Had a comma but nothing after. + p.errorf("missing operand") + } + } + i := p.arch.Pseudos[word] + if i != 0 { + p.pseudo(i, word, operands) + return true + } + i = p.arch.Instructions[word] + if i != 0 { + p.instruction(i, word, operands) + return true + } + p.errorf("unrecognized instruction %s", word) + return true +} + +func (p *Parser) instruction(op int, word string, operands [][]lex.Token) { + p.addr = p.addr[0:0] + for _, op := range operands { + p.addr = append(p.addr, p.address(op)) + } + // Is it a jump? TODO + if word[0] == 'J' || word == "CALL" { + p.asmJump(op, p.addr) + return + } + p.asmInstruction(op, p.addr) +} + +func (p *Parser) pseudo(op int, word string, operands [][]lex.Token) { + switch op { + case p.arch.ATEXT: + p.asmText(word, operands) + case p.arch.ADATA: + p.asmData(word, operands) + case p.arch.AGLOBL: + p.asmGlobl(word, operands) + case p.arch.APCDATA: + p.asmPCData(word, operands) + case p.arch.AFUNCDATA: + p.asmFuncData(word, operands) + default: + p.errorf("unimplemented: %s", word) + } +} + +func (p *Parser) start(operand []lex.Token) { + p.input = operand + p.inputPos = 0 +} + +// address parses the operand into a link address structure. +func (p *Parser) address(operand []lex.Token) addr.Addr { + p.start(operand) + addr := addr.Addr{} + p.operand(&addr) + return addr +} + +// parse (R). The opening paren is known to be there. +// The return value states whether it was a scaled mode. +func (p *Parser) parenRegister(a *addr.Addr) bool { + p.next() + tok := p.next() + if tok.ScanToken != scanner.Ident { + p.errorf("expected register, got %s", tok) + } + r, present := p.arch.Registers[tok.String()] + if !present { + p.errorf("expected register, found %s", tok.String()) + } + a.IsIndirect = true + scaled := p.peek() == '*' + if scaled { + // (R*2) + p.next() + tok := p.get(scanner.Int) + a.Scale = p.scale(tok.String()) + a.Index = int16(r) // TODO: r should have type int16 but is uint8. + } else { + if a.HasRegister { + p.errorf("multiple indirections") + } + a.HasRegister = true + a.Register = int16(r) + } + p.expect(')') + p.next() + return scaled +} + +// scale converts a decimal string into a valid scale factor. +func (p *Parser) scale(s string) int8 { + switch s { + case "1", "2", "4", "8": + return int8(s[0] - '0') + } + p.errorf("bad scale: %s", s) + return 0 +} + +// parse (R) or (R)(R*scale). The opening paren is known to be there. +func (p *Parser) addressMode(a *addr.Addr) { + scaled := p.parenRegister(a) + if !scaled && p.peek() == '(' { + p.parenRegister(a) + } +} + +// operand parses a general operand and stores the result in *a. +func (p *Parser) operand(a *addr.Addr) bool { + if len(p.input) == 0 { + p.errorf("empty operand: cannot happen") + return false + } + switch p.peek() { + case '$': + p.next() + switch p.peek() { + case scanner.Ident: + a.IsImmediateAddress = true + p.operand(a) // TODO + case scanner.String: + a.IsImmediateConstant = true + a.HasString = true + a.String = p.atos(p.next().String()) + case scanner.Int, scanner.Float, '+', '-', '~', '(': + a.IsImmediateConstant = true + if p.have(scanner.Float) { + a.HasFloat = true + a.Float = p.floatExpr() + } else { + a.HasOffset = true + a.Offset = int64(p.expr()) + } + default: + p.errorf("illegal %s in immediate operand", p.next().String()) + } + case '*': + p.next() + tok := p.next() + r, present := p.arch.Registers[tok.String()] + if !present { + p.errorf("expected register; got %s", tok.String()) + } + a.HasRegister = true + a.Register = int16(r) + case '(': + p.next() + if p.peek() == scanner.Ident { + p.back() + p.addressMode(a) + break + } + p.back() + fallthrough + case '+', '-', '~', scanner.Int, scanner.Float: + if p.have(scanner.Float) { + a.HasFloat = true + a.Float = p.floatExpr() + } else { + a.HasOffset = true + a.Offset = int64(p.expr()) + } + if p.peek() != scanner.EOF { + p.expect('(') + p.addressMode(a) + } + case scanner.Ident: + tok := p.next() + // Either R or (most general) ident<>+4(SB)(R*scale). + if r, present := p.arch.Registers[tok.String()]; present { + a.HasRegister = true + a.Register = int16(r) + // Possibly register pair: DX:AX. + if p.peek() == ':' { + p.next() + tok = p.get(scanner.Ident) + a.HasRegister2 = true + a.Register2 = int16(p.arch.Registers[tok.String()]) + } + break + } + // Weirdness with statics: Might now have "<>". + if p.peek() == '<' { + p.next() + p.get('>') + a.IsStatic = true + } + if p.peek() == '+' || p.peek() == '-' { + a.HasOffset = true + a.Offset = int64(p.expr()) + } + a.Symbol = tok.String() + if p.peek() == scanner.EOF { + break + } + // Expect (SB) or (FP) + p.expect('(') + p.parenRegister(a) + if a.Register != arch.RSB && a.Register != arch.RFP && a.Register != arch.RSP { + p.errorf("expected SB, FP, or SP offset for %s", tok) + } + // Possibly have scaled register (CX*8). + if p.peek() != scanner.EOF { + p.expect('(') + p.addressMode(a) + } + default: + p.errorf("unexpected %s in operand", p.next()) + } + p.expect(scanner.EOF) + return true +} + +// expr = term | term '+' term +func (p *Parser) expr() uint64 { + value := p.term() + for { + switch p.peek() { + case '+': + p.next() + x := p.term() + if addOverflows(x, value) { + p.errorf("overflow in %d+%d", value, x) + } + value += x + case '-': + p.next() + value -= p.term() + case '|': + p.next() + value |= p.term() + case '^': + p.next() + value ^= p.term() + default: + return value + } + } +} + +// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')' +func (p *Parser) floatExpr() float64 { + tok := p.next() + switch tok.ScanToken { + case '(': + v := p.floatExpr() + if p.next().ScanToken != ')' { + p.errorf("missing closing paren") + } + return v + case '+': + return +p.floatExpr() + case '-': + return -p.floatExpr() + case scanner.Float: + return p.atof(tok.String()) + } + p.errorf("unexpected %s evaluating float expression", tok) + return 0 +} + +// term = const | term '*' term | '(' expr ')' +func (p *Parser) term() uint64 { + tok := p.next() + switch tok.ScanToken { + case '(': + v := p.expr() + if p.next().ScanToken != ')' { + p.errorf("missing closing paren") + } + return v + case '+': + return +p.term() + case '-': + return -p.term() + case '~': + return ^p.term() + case scanner.Int: + value := p.atoi(tok.String()) + for { + switch p.peek() { + case '*': + p.next() + value *= p.term() // OVERFLOW? + case '/': + p.next() + value /= p.term() + case '%': + p.next() + value %= p.term() + case lex.LSH: + p.next() + shift := p.term() + if shift < 0 { + p.errorf("negative left shift %d", shift) + } + value <<= uint(shift) + case lex.RSH: + p.next() + shift := p.term() + if shift < 0 { + p.errorf("negative right shift %d", shift) + } + value >>= uint(shift) + case '&': + p.next() + value &= p.term() + default: + return value + } + } + } + p.errorf("unexpected %s evaluating expression", tok) + return 0 +} + +func (p *Parser) atoi(str string) uint64 { + value, err := strconv.ParseUint(str, 0, 64) + if err != nil { + p.errorf("%s", err) + } + return value +} + +func (p *Parser) atof(str string) float64 { + value, err := strconv.ParseFloat(str, 64) + if err != nil { + p.errorf("%s", err) + } + return value +} + +func (p *Parser) atos(str string) string { + value, err := strconv.Unquote(str) + if err != nil { + p.errorf("%s", err) + } + return value +} + +// EOF represents the end of input. +var EOF = lex.Make(scanner.EOF, "EOF") + +func (p *Parser) next() lex.Token { + if !p.more() { + return EOF + } + tok := p.input[p.inputPos] + p.inputPos++ + return tok +} + +func (p *Parser) back() { + p.inputPos-- +} + +func (p *Parser) peek() lex.ScanToken { + if p.more() { + return p.input[p.inputPos].ScanToken + } + return scanner.EOF +} + +func (p *Parser) more() bool { + return p.inputPos < len(p.input) +} + +// get verifies that the next item has the expected type and returns it. +func (p *Parser) get(expected lex.ScanToken) lex.Token { + p.expect(expected) + return p.next() +} + +// expect verifies that the next item has the expected type. It does not consume it. +func (p *Parser) expect(expected lex.ScanToken) { + if p.peek() != expected { + p.errorf("expected %s, found %s", expected, p.next()) + } +} + +// have reports whether the remaining tokens contain the specified token. +func (p *Parser) have(token lex.ScanToken) bool { + for i := p.inputPos; i < len(p.input); i++ { + if p.input[i].ScanToken == token { + return true + } + } + return false +} diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go new file mode 100644 index 0000000000..08354119a6 --- /dev/null +++ b/src/cmd/asm/main.go @@ -0,0 +1,63 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import ( + "flag" + "fmt" + "go/build" + "log" + "os" + + "cmd/asm/internal/arch" + "cmd/asm/internal/asm" + "cmd/asm/internal/flags" + "cmd/asm/internal/lex" + + "cmd/internal/obj" +) + +func main() { + log.SetFlags(0) + log.SetPrefix("asm: ") + + GOARCH := build.Default.GOARCH + + architecture := arch.Set(GOARCH) + if architecture == nil { + log.Fatalf("asm: unrecognized architecture %s", GOARCH) + } + + // Is this right? + flags.Parse(build.Default.GOROOT, build.Default.GOOS, GOARCH, architecture.Thechar) + + // Create object file, write header. + fd, err := os.Create(*flags.OutputFile) + if err != nil { + log.Fatal(err) + } + ctxt := obj.Linknew(architecture.LinkArch) + if *flags.PrintOut { + ctxt.Debugasm = 1 + } + ctxt.Bso = obj.Binitw(os.Stdout) + defer obj.Bflush(ctxt.Bso) + ctxt.Diag = log.Fatalf + output := obj.Binitw(fd) + fmt.Fprintf(output, "go object %s %s %s\n", obj.Getgoos(), obj.Getgoarch(), obj.Getgoversion()) + fmt.Fprintf(output, "!\n") + + lexer := lex.NewLexer(flag.Arg(0), ctxt) + parser := asm.NewParser(ctxt, architecture, lexer) + pList := obj.Linknewplist(ctxt) + var ok bool + pList.Firstpc, ok = parser.Parse() + if !ok { + log.Print("FAIL TODO") + os.Exit(1) + } + obj.Writeobjdirect(ctxt, output) + obj.Bflush(output) +}