]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.cc] cmd/asm: the assembler proper
authorRob Pike <r@golang.org>
Thu, 22 Jan 2015 18:48:33 +0000 (10:48 -0800)
committerRob Pike <r@golang.org>
Fri, 23 Jan 2015 04:54:05 +0000 (04:54 +0000)
Add main.go, the simple driver for the assembler, and the
subdirectory internal/asm, which contains the parser and
instruction generator.

It's likely that much of the implementation is superstition,
or at best experimental phenomenology, but it does generate
working binaries.

Change-Id: I322a9ae8a20174b6693153f30e39217ba68f8032
Reviewed-on: https://go-review.googlesource.com/3196
Reviewed-by: Russ Cox <rsc@golang.org>
src/cmd/asm/internal/asm/asm.go [new file with mode: 0644]
src/cmd/asm/internal/asm/overflow.go [new file with mode: 0644]
src/cmd/asm/internal/asm/parse.go [new file with mode: 0644]
src/cmd/asm/main.go [new file with mode: 0644]

diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go
new file mode 100644 (file)
index 0000000..4ffbe55
--- /dev/null
@@ -0,0 +1,533 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+import (
+       "fmt"
+       "strings"
+       "text/scanner"
+
+       "cmd/asm/internal/addr"
+       "cmd/asm/internal/arch"
+       "cmd/asm/internal/lex"
+       "cmd/internal/obj"
+)
+
+// TODO: This package has many numeric conversions that should be unnecessary.
+
+// symbolType returns the extern/static etc. type appropriate for the symbol.
+func (p *Parser) symbolType(a *addr.Addr) int {
+       switch a.Register {
+       case arch.RFP:
+               return p.arch.D_PARAM
+       case arch.RSP:
+               return p.arch.D_AUTO
+       case arch.RSB:
+               // See comment in addrToAddr.
+               if a.IsImmediateAddress {
+                       return p.arch.D_ADDR
+               }
+               if a.IsStatic {
+                       return p.arch.D_STATIC
+               }
+               return p.arch.D_EXTERN
+       }
+       p.errorf("invalid register for symbol %s", a.Symbol)
+       return 0
+}
+
+// TODO: configure the architecture
+
+func (p *Parser) addrToAddr(a *addr.Addr) obj.Addr {
+       out := p.arch.NoAddr
+       if a.Has(addr.Symbol) {
+               // How to encode the symbols:
+               // syntax = Typ,Index
+               // $a(SB) = ADDR,EXTERN
+               // $a<>(SB) = ADDR,STATIC
+               // a(SB) = EXTERN,NONE
+               // a<>(SB) = STATIC,NONE
+               // The call to symbolType does the first column; we need to fix up Index here.
+               out.Type = int16(p.symbolType(a))
+               out.Sym = obj.Linklookup(p.linkCtxt, a.Symbol, 0)
+               if a.IsImmediateAddress {
+                       // Index field says whether it's a static.
+                       switch a.Register {
+                       case arch.RSB:
+                               if a.IsStatic {
+                                       out.Index = uint8(p.arch.D_STATIC)
+                               } else {
+                                       out.Index = uint8(p.arch.D_EXTERN)
+                               }
+                       default:
+                               p.errorf("can't handle immediate address of %s not (SB)\n", a.Symbol)
+                       }
+               }
+       } else if a.Has(addr.Register) {
+               // TODO: SP is tricky, and this isn't good enough.
+               // SP = D_SP
+               // 4(SP) = 4(D_SP)
+               // x+4(SP) = D_AUTO with sym=x TODO
+               out.Type = a.Register
+               if a.Register == arch.RSP {
+                       out.Type = int16(p.arch.SP)
+               }
+               if a.IsIndirect {
+                       out.Type += p.arch.D_INDIR
+               }
+               // a.Register2 handled in the instruction method; it's bizarre.
+       }
+       if a.Has(addr.Index) {
+               out.Index = uint8(a.Index) // TODO: out.Index == p.NoArch.Index should be same type as Register.
+       }
+       if a.Has(addr.Scale) {
+               out.Scale = a.Scale
+       }
+       if a.Has(addr.Offset) {
+               out.Offset = a.Offset
+               if a.Is(addr.Offset) {
+                       // RHS of MOVL $0xf1, 0xf1  // crash
+                       out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE)
+               } else if a.IsImmediateConstant && out.Type == int16(p.arch.D_NONE) {
+                       out.Type = int16(p.arch.D_CONST)
+               }
+       }
+       if a.Has(addr.Float) {
+               out.U.Dval = a.Float
+               out.Type = int16(p.arch.D_FCONST)
+       }
+       if a.Has(addr.String) {
+               out.U.Sval = a.String
+               out.Type = int16(p.arch.D_SCONST)
+       }
+       // HACK TODO
+       if a.IsIndirect && !a.Has(addr.Register) && a.Has(addr.Index) {
+               // LHS of LEAQ  0(BX*8), CX
+               out.Type = p.arch.D_INDIR + int16(p.arch.D_NONE)
+       }
+       return out
+}
+
+func (p *Parser) link(prog *obj.Prog, doLabel bool) {
+       if p.firstProg == nil {
+               p.firstProg = prog
+       } else {
+               p.lastProg.Link = prog
+       }
+       p.lastProg = prog
+       if doLabel {
+               p.pc++
+               for _, label := range p.pendingLabels {
+                       if p.labels[label] != nil {
+                               p.errorf("label %q multiply defined", label)
+                       }
+                       p.labels[label] = prog
+               }
+               p.pendingLabels = p.pendingLabels[0:0]
+       }
+       prog.Pc = int64(p.pc)
+       fmt.Println(p.histLineNum, prog)
+}
+
+// asmText assembles a TEXT pseudo-op.
+// TEXT runtime·sigtramp(SB),4,$0-0
+func (p *Parser) asmText(word string, operands [][]lex.Token) {
+       if len(operands) != 3 {
+               p.errorf("expect three operands for TEXT")
+       }
+
+       // Operand 0 is the symbol name in the form foo(SB).
+       // That means symbol plus indirect on SB and no offset.
+       nameAddr := p.address(operands[0])
+       if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
+               p.errorf("TEXT symbol %q must be an offset from SB", nameAddr.Symbol)
+       }
+       name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
+
+       // Operand 1 is the text flag, a literal integer.
+       flagAddr := p.address(operands[1])
+       if !flagAddr.Is(addr.Offset) {
+               p.errorf("TEXT flag for %s must be an integer", name)
+       }
+       flag := int8(flagAddr.Offset)
+
+       // Operand 2 is the frame and arg size.
+       // Bizarre syntax: $a-b is two words, not subtraction.
+       // We might even see $-b, which means $0-b. Ugly.
+       // Assume if it has this syntax that b is a plain constant.
+       // Not clear we can do better, but it doesn't matter.
+       op := operands[2]
+       n := len(op)
+       var locals int64
+       if n >= 2 && op[n-2].ScanToken == '-' && op[n-1].ScanToken == scanner.Int {
+               p.start(op[n-1:])
+               locals = int64(p.expr())
+               op = op[:n-2]
+       }
+       args := int64(0)
+       if len(op) == 1 && op[0].ScanToken == '$' {
+               // Special case for $-8.
+               // Done; args is zero.
+       } else {
+               argsAddr := p.address(op)
+               if !argsAddr.Is(addr.ImmediateConstant | addr.Offset) {
+                       p.errorf("TEXT frame size for %s must be an immediate constant", name)
+               }
+               args = argsAddr.Offset
+       }
+
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               As:     int16(p.arch.ATEXT),
+               Lineno: int32(p.histLineNum),
+               From: obj.Addr{
+                       Type:  int16(p.symbolType(&nameAddr)),
+                       Index: uint8(p.arch.D_NONE),
+                       Sym:   obj.Linklookup(p.linkCtxt, name, 0),
+                       Scale: flag,
+               },
+               To: obj.Addr{
+                       Index: uint8(p.arch.D_NONE),
+               },
+       }
+       // Encoding of arg and locals depends on architecture.
+       switch p.arch.Thechar {
+       case '6':
+               prog.To.Type = int16(p.arch.D_CONST)
+               prog.To.Offset = (locals << 32) | args
+       case '8':
+               prog.To.Type = p.arch.D_CONST2
+               prog.To.Offset = args
+               prog.To.Offset2 = int32(locals)
+       default:
+               p.errorf("internal error: can't encode TEXT arg/frame")
+       }
+       p.link(prog, true)
+}
+
+// asmData assembles a DATA pseudo-op.
+// DATA masks<>+0x00(SB)/4, $0x00000000
+func (p *Parser) asmData(word string, operands [][]lex.Token) {
+       if len(operands) != 2 {
+               p.errorf("expect two operands for DATA")
+       }
+
+       // Operand 0 has the general form foo<>+0x04(SB)/4.
+       op := operands[0]
+       n := len(op)
+       if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int {
+               p.errorf("expect /size for DATA argument")
+       }
+       scale := p.scale(op[n-1].String())
+       op = op[:n-2]
+       nameAddr := p.address(op)
+       ok := nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect|addr.Offset)
+       if !ok || nameAddr.Register != arch.RSB {
+               p.errorf("DATA symbol %q must be an offset from SB", nameAddr.Symbol)
+       }
+       name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
+
+       // Operand 1 is an immediate constant or address.
+       valueAddr := p.address(operands[1])
+       if !valueAddr.IsImmediateConstant && !valueAddr.IsImmediateAddress {
+               p.errorf("DATA value must be an immediate constant or address")
+       }
+
+       // The addresses must not overlap. Easiest test: require monotonicity.
+       if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr {
+               p.errorf("overlapping DATA entry for %s", nameAddr.Symbol)
+       }
+       p.dataAddr[name] = nameAddr.Offset + int64(scale)
+
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               As:     int16(p.arch.ADATA),
+               Lineno: int32(p.histLineNum),
+               From: obj.Addr{
+                       Type:   int16(p.symbolType(&nameAddr)),
+                       Index:  uint8(p.arch.D_NONE),
+                       Sym:    obj.Linklookup(p.linkCtxt, name, 0),
+                       Offset: nameAddr.Offset,
+                       Scale:  scale,
+               },
+               To: p.addrToAddr(&valueAddr),
+       }
+
+       p.link(prog, false)
+}
+
+// asmGlobl assembles a GLOBL pseudo-op.
+// GLOBL shifts<>(SB),8,$256
+// GLOBL shifts<>(SB),$256
+func (p *Parser) asmGlobl(word string, operands [][]lex.Token) {
+       if len(operands) != 2 && len(operands) != 3 {
+               p.errorf("expect two or three operands for GLOBL")
+       }
+
+       // Operand 0 has the general form foo<>+0x04(SB).
+       nameAddr := p.address(operands[0])
+       if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
+               p.errorf("GLOBL symbol %q must be an offset from SB", nameAddr.Symbol)
+       }
+       name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
+
+       // If three operands, middle operand is a scale.
+       scale := int8(0)
+       op := operands[1]
+       if len(operands) == 3 {
+               scaleAddr := p.address(op)
+               if !scaleAddr.Is(addr.Offset) {
+                       p.errorf("GLOBL scale must be a constant")
+               }
+               scale = int8(scaleAddr.Offset)
+               op = operands[2]
+       }
+
+       // Final operand is an immediate constant.
+       sizeAddr := p.address(op)
+       if !sizeAddr.Is(addr.ImmediateConstant | addr.Offset) {
+               p.errorf("GLOBL size must be an immediate constant")
+       }
+       size := sizeAddr.Offset
+
+       // log.Printf("GLOBL %s %d, $%d", name, scale, size)
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               As:     int16(p.arch.AGLOBL),
+               Lineno: int32(p.histLineNum),
+               From: obj.Addr{
+                       Type:   int16(p.symbolType(&nameAddr)),
+                       Index:  uint8(p.arch.D_NONE),
+                       Sym:    obj.Linklookup(p.linkCtxt, name, 0),
+                       Offset: nameAddr.Offset,
+                       Scale:  scale,
+               },
+               To: obj.Addr{
+                       Type:   int16(p.arch.D_CONST),
+                       Index:  uint8(p.arch.D_NONE),
+                       Offset: size,
+               },
+       }
+       p.link(prog, false)
+}
+
+// asmPCData assembles a PCDATA pseudo-op.
+// PCDATA $2, $705
+func (p *Parser) asmPCData(word string, operands [][]lex.Token) {
+       if len(operands) != 2 {
+               p.errorf("expect two operands for PCDATA")
+       }
+
+       // Operand 0 must be an immediate constant.
+       addr0 := p.address(operands[0])
+       if !addr0.Is(addr.ImmediateConstant | addr.Offset) {
+               p.errorf("PCDATA value must be an immediate constant")
+       }
+       value0 := addr0.Offset
+
+       // Operand 1 must be an immediate constant.
+       addr1 := p.address(operands[1])
+       if !addr1.Is(addr.ImmediateConstant | addr.Offset) {
+               p.errorf("PCDATA value must be an immediate constant")
+       }
+       value1 := addr1.Offset
+
+       // log.Printf("PCDATA $%d, $%d", value0, value1)
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               As:     int16(p.arch.APCDATA),
+               Lineno: int32(p.histLineNum),
+               From: obj.Addr{
+                       Type:   int16(p.arch.D_CONST),
+                       Index:  uint8(p.arch.D_NONE),
+                       Offset: value0,
+               },
+               To: obj.Addr{
+                       Type:   int16(p.arch.D_CONST),
+                       Index:  uint8(p.arch.D_NONE),
+                       Offset: value1,
+               },
+       }
+       p.link(prog, true)
+}
+
+// asmFuncData assembles a FUNCDATA pseudo-op.
+// FUNCDATA $1, funcdata<>+4(SB)
+func (p *Parser) asmFuncData(word string, operands [][]lex.Token) {
+       if len(operands) != 2 {
+               p.errorf("expect two operands for FUNCDATA")
+       }
+
+       // Operand 0 must be an immediate constant.
+       valueAddr := p.address(operands[0])
+       if !valueAddr.Is(addr.ImmediateConstant | addr.Offset) {
+               p.errorf("FUNCDATA value must be an immediate constant")
+       }
+       value := valueAddr.Offset
+
+       // Operand 1 is a symbol name in the form foo(SB).
+       // That means symbol plus indirect on SB and no offset.
+       nameAddr := p.address(operands[1])
+       if !nameAddr.Is(addr.Symbol|addr.Register|addr.Indirect) || nameAddr.Register != arch.RSB {
+               p.errorf("FUNCDATA symbol %q must be an offset from SB", nameAddr.Symbol)
+       }
+       name := strings.Replace(nameAddr.Symbol, "·", ".", 1)
+
+       // log.Printf("FUNCDATA %s, $%d", name, value)
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               As:     int16(p.arch.AFUNCDATA),
+               Lineno: int32(p.histLineNum),
+               From: obj.Addr{
+                       Type:   int16(p.arch.D_CONST),
+                       Index:  uint8(p.arch.D_NONE),
+                       Offset: value,
+               },
+               To: obj.Addr{
+                       Type:  int16(p.symbolType(&nameAddr)),
+                       Index: uint8(p.arch.D_NONE),
+                       Sym:   obj.Linklookup(p.linkCtxt, name, 0),
+               },
+       }
+       p.link(prog, true)
+}
+
+// asmJump assembles a jump instruction.
+// JMP R1
+// JMP exit
+// JMP 3(PC)
+func (p *Parser) asmJump(op int, a []addr.Addr) {
+       var target *addr.Addr
+       switch len(a) {
+       default:
+               p.errorf("jump must have one or two addresses")
+       case 1:
+               target = &a[0]
+       case 2:
+               if !a[0].Is(0) {
+                       p.errorf("two-address jump must have empty first address")
+               }
+               target = &a[1]
+       }
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               Lineno: int32(p.histLineNum),
+               As:     int16(op),
+               From:   p.arch.NoAddr,
+       }
+       switch {
+       case target.Is(addr.Register):
+               // JMP R1
+               prog.To = p.addrToAddr(target)
+       case target.Is(addr.Symbol):
+               // JMP exit
+               targetProg := p.labels[target.Symbol]
+               if targetProg == nil {
+                       p.toPatch = append(p.toPatch, Patch{prog, target.Symbol})
+               } else {
+                       p.branch(prog, targetProg)
+               }
+       case target.Is(addr.Register | addr.Indirect), target.Is(addr.Register | addr.Indirect | addr.Offset):
+               // JMP 4(AX)
+               if target.Register == arch.RPC {
+                       prog.To = obj.Addr{
+                               Type:   int16(p.arch.D_BRANCH),
+                               Index:  uint8(p.arch.D_NONE),
+                               Offset: p.pc + 1 + target.Offset, // +1 because p.pc is incremented in link, below.
+                       }
+               } else {
+                       prog.To = p.addrToAddr(target)
+               }
+       case target.Is(addr.Symbol | addr.Indirect | addr.Register):
+               // JMP main·morestack(SB)
+               if target.Register != arch.RSB {
+                       p.errorf("jmp to symbol must be SB-relative")
+               }
+               prog.To = obj.Addr{
+                       Type:   int16(p.arch.D_BRANCH),
+                       Sym:    obj.Linklookup(p.linkCtxt, target.Symbol, 0),
+                       Index:  uint8(p.arch.D_NONE),
+                       Offset: target.Offset,
+               }
+       default:
+               p.errorf("cannot assemble jump %+v", target)
+       }
+       p.link(prog, true)
+}
+
+func (p *Parser) patch() {
+       for _, patch := range p.toPatch {
+               targetProg := p.labels[patch.label]
+               if targetProg == nil {
+                       p.errorf("undefined label %s", patch.label)
+               } else {
+                       p.branch(patch.prog, targetProg)
+               }
+       }
+}
+
+func (p *Parser) branch(jmp, target *obj.Prog) {
+       jmp.To = obj.Addr{
+               Type:  int16(p.arch.D_BRANCH),
+               Index: uint8(p.arch.D_NONE),
+       }
+       jmp.To.U.Branch = target
+}
+
+// asmInstruction assembles an instruction.
+// MOVW R9, (R10)
+func (p *Parser) asmInstruction(op int, a []addr.Addr) {
+       prog := &obj.Prog{
+               Ctxt:   p.linkCtxt,
+               Lineno: int32(p.histLineNum),
+               As:     int16(op),
+       }
+       switch len(a) {
+       case 0:
+               prog.From = p.arch.NoAddr
+               prog.To = p.arch.NoAddr
+       case 1:
+               if p.arch.UnaryDestination[op] {
+                       prog.From = p.arch.NoAddr
+                       prog.To = p.addrToAddr(&a[0])
+               } else {
+                       prog.From = p.addrToAddr(&a[0])
+                       prog.To = p.arch.NoAddr
+               }
+       case 2:
+               prog.From = p.addrToAddr(&a[0])
+               prog.To = p.addrToAddr(&a[1])
+               // DX:AX as a register pair can only appear on the RHS.
+               // Bizarrely, to obj it's specified by setting index on the LHS.
+               // TODO: can we fix this?
+               if a[1].Has(addr.Register2) {
+                       if int(prog.From.Index) != p.arch.D_NONE {
+                               p.errorf("register pair operand on RHS must have register on LHS")
+                       }
+                       prog.From.Index = uint8(a[1].Register2)
+               }
+       case 3:
+               // CMPSD etc.; third operand is imm8, stored in offset, or a register.
+               prog.From = p.addrToAddr(&a[0])
+               prog.To = p.addrToAddr(&a[1])
+               switch {
+               case a[2].Is(addr.Offset):
+                       prog.To.Offset = a[2].Offset
+               case a[2].Is(addr.Register):
+                       // Strange reodering.
+                       prog.To = p.addrToAddr(&a[2])
+                       prog.From = p.addrToAddr(&a[1])
+                       if !a[0].IsImmediateConstant {
+                               p.errorf("expected $value for 1st operand")
+                       }
+                       prog.To.Offset = a[0].Offset
+               default:
+                       p.errorf("expected offset or register for 3rd operand")
+               }
+       default:
+               p.errorf("can't handle instruction with %d operands", len(a))
+       }
+       p.link(prog, true)
+}
diff --git a/src/cmd/asm/internal/asm/overflow.go b/src/cmd/asm/internal/asm/overflow.go
new file mode 100644 (file)
index 0000000..9e03e7a
--- /dev/null
@@ -0,0 +1,94 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package asm
+
+/*
+       Tested with uint8s like this:
+
+       for a := 0; a <= 255; a++ {
+               for b := 0; b <= 127; b++ {
+                       ovfl := a+b != int(uint8(a)+uint8(b))
+                       if addOverflows(uint8(a), uint8(b)) != ovfl {
+                               fmt.Printf("%d+%d fails\n", a, b)
+                               break
+                       }
+               }
+       }
+       for a := 0; a <= 255; a++ {
+               for b := 0; b <= 127; b++ {
+                       ovfl := a-b != int(uint8(a)-uint8(b))
+                       if subOverflows(uint8(a), uint8(b)) != ovfl {
+                               fmt.Printf("%d-%d fails\n", a, b)
+                               break
+                       }
+               }
+       }
+       for a := 0; a <= 255; a++ {
+               for b := 0; b <= 255; b++ {
+                       ovfl := a*b != int(uint8(a)*uint8(b))
+                       if mulOverflows(uint8(a), uint8(b)) != ovfl {
+                               fmt.Printf("%d*%d fails\n", a, b)
+                       }
+               }
+       }
+*/
+
+func addOverflows(a, b uint64) bool {
+       return a+b < a
+}
+
+func subOverflows(a, b uint64) bool {
+       return a-b > a
+}
+
+func mulOverflows(a, b uint64) bool {
+       if a <= 1 || b <= 1 {
+               return false
+       }
+       c := a * b
+       return c/b != a
+}
+
+/*
+For the record, signed overflow:
+
+const mostNegative = -(mostPositive + 1)
+const mostPositive = 1<<63 - 1
+
+func signedAddOverflows(a, b int64) bool {
+       if (a >= 0) != (b >= 0) {
+               // Different signs cannot overflow.
+               return false
+       }
+       if a >= 0 {
+               // Both are positive.
+               return a+b < 0
+       }
+       return a+b >= 0
+}
+
+func signedSubOverflows(a, b int64) bool {
+       if (a >= 0) == (b >= 0) {
+               // Same signs cannot overflow.
+               return false
+       }
+       if a >= 0 {
+               // a positive, b negative.
+               return a-b < 0
+       }
+       return a-b >= 0
+}
+
+func signedMulOverflows(a, b int64) bool {
+       if a == 0 || b == 0 || a == 1 || b == 1 {
+               return false
+       }
+       if a == mostNegative || b == mostNegative {
+               return true
+       }
+       c := a * b
+       return c/b != a
+}
+*/
diff --git a/src/cmd/asm/internal/asm/parse.go b/src/cmd/asm/internal/asm/parse.go
new file mode 100644 (file)
index 0000000..377f2ac
--- /dev/null
@@ -0,0 +1,527 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package asm implements the parser and instruction generator for the assembler.
+// TODO: Split apart?
+package asm
+
+import (
+       "fmt"
+       "log"
+       "os"
+       "strconv"
+       "text/scanner"
+
+       "cmd/asm/internal/addr"
+       "cmd/asm/internal/arch"
+       "cmd/asm/internal/lex"
+       "cmd/internal/obj"
+)
+
+type Parser struct {
+       lex           lex.TokenReader
+       lineNum       int   // Line number in source file.
+       histLineNum   int   // Cumulative line number across source files.
+       errorLine     int   // (Cumulative) line number of last error.
+       errorCount    int   // Number of errors.
+       pc            int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
+       input         []lex.Token
+       inputPos      int
+       pendingLabels []string // Labels to attach to next instruction.
+       labels        map[string]*obj.Prog
+       toPatch       []Patch
+       addr          []addr.Addr
+       arch          *arch.Arch
+       linkCtxt      *obj.Link
+       firstProg     *obj.Prog
+       lastProg      *obj.Prog
+       dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
+}
+
+type Patch struct {
+       prog  *obj.Prog
+       label string
+}
+
+func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
+       return &Parser{
+               linkCtxt: ctxt,
+               arch:     ar,
+               lex:      lexer,
+               labels:   make(map[string]*obj.Prog),
+               dataAddr: make(map[string]int64),
+       }
+}
+
+func (p *Parser) errorf(format string, args ...interface{}) {
+       if p.histLineNum == p.errorLine {
+               // Only one error per line.
+               return
+       }
+       p.errorLine = p.histLineNum
+       // Put file and line information on head of message.
+       format = "%s:%d: " + format + "\n"
+       args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
+       fmt.Fprintf(os.Stderr, format, args...)
+       p.errorCount++
+       if p.errorCount > 10 {
+               log.Fatal("too many errors")
+       }
+}
+
+func (p *Parser) Parse() (*obj.Prog, bool) {
+       for p.line() {
+       }
+       if p.errorCount > 0 {
+               return nil, false
+       }
+       p.patch()
+       return p.firstProg, true
+}
+
+// WORD op {, op} '\n'
+func (p *Parser) line() bool {
+       // Skip newlines.
+       var tok lex.ScanToken
+       for {
+               tok = p.lex.Next()
+               // We save the line number here so error messages from this instruction
+               // are labeled with this line. Otherwise we complain after we've absorbed
+               // the terminating newline and the line numbers are off by one in errors.
+               p.lineNum = p.lex.Line()
+               p.histLineNum = lex.HistLine()
+               switch tok {
+               case '\n':
+                       continue
+               case scanner.EOF:
+                       return false
+               }
+               break
+       }
+       // First item must be an identifier.
+       if tok != scanner.Ident {
+               p.errorf("expected identifier, found %q", p.lex.Text())
+               return false // Might as well stop now.
+       }
+       word := p.lex.Text()
+       operands := make([][]lex.Token, 0, 3)
+       // Zero or more comma-separated operands, one per loop.
+       first := true // Permit ':' to define this as a label.
+       for tok != '\n' && tok != ';' {
+               // Process one operand.
+               items := make([]lex.Token, 0, 3)
+               for {
+                       tok = p.lex.Next()
+                       if first {
+                               if tok == ':' {
+                                       p.pendingLabels = append(p.pendingLabels, word)
+                                       return true
+                               }
+                               first = false
+                       }
+                       if tok == scanner.EOF {
+                               p.errorf("unexpected EOF")
+                               return false
+                       }
+                       if tok == '\n' || tok == ';' || tok == ',' {
+                               break
+                       }
+                       items = append(items, lex.Make(tok, p.lex.Text()))
+               }
+               if len(items) > 0 {
+                       operands = append(operands, items)
+               } else if len(operands) > 0 {
+                       // Had a comma but nothing after.
+                       p.errorf("missing operand")
+               }
+       }
+       i := p.arch.Pseudos[word]
+       if i != 0 {
+               p.pseudo(i, word, operands)
+               return true
+       }
+       i = p.arch.Instructions[word]
+       if i != 0 {
+               p.instruction(i, word, operands)
+               return true
+       }
+       p.errorf("unrecognized instruction %s", word)
+       return true
+}
+
+func (p *Parser) instruction(op int, word string, operands [][]lex.Token) {
+       p.addr = p.addr[0:0]
+       for _, op := range operands {
+               p.addr = append(p.addr, p.address(op))
+       }
+       // Is it a jump? TODO
+       if word[0] == 'J' || word == "CALL" {
+               p.asmJump(op, p.addr)
+               return
+       }
+       p.asmInstruction(op, p.addr)
+}
+
+func (p *Parser) pseudo(op int, word string, operands [][]lex.Token) {
+       switch op {
+       case p.arch.ATEXT:
+               p.asmText(word, operands)
+       case p.arch.ADATA:
+               p.asmData(word, operands)
+       case p.arch.AGLOBL:
+               p.asmGlobl(word, operands)
+       case p.arch.APCDATA:
+               p.asmPCData(word, operands)
+       case p.arch.AFUNCDATA:
+               p.asmFuncData(word, operands)
+       default:
+               p.errorf("unimplemented: %s", word)
+       }
+}
+
+func (p *Parser) start(operand []lex.Token) {
+       p.input = operand
+       p.inputPos = 0
+}
+
+// address parses the operand into a link address structure.
+func (p *Parser) address(operand []lex.Token) addr.Addr {
+       p.start(operand)
+       addr := addr.Addr{}
+       p.operand(&addr)
+       return addr
+}
+
+// parse (R). The opening paren is known to be there.
+// The return value states whether it was a scaled mode.
+func (p *Parser) parenRegister(a *addr.Addr) bool {
+       p.next()
+       tok := p.next()
+       if tok.ScanToken != scanner.Ident {
+               p.errorf("expected register, got %s", tok)
+       }
+       r, present := p.arch.Registers[tok.String()]
+       if !present {
+               p.errorf("expected register, found %s", tok.String())
+       }
+       a.IsIndirect = true
+       scaled := p.peek() == '*'
+       if scaled {
+               // (R*2)
+               p.next()
+               tok := p.get(scanner.Int)
+               a.Scale = p.scale(tok.String())
+               a.Index = int16(r) // TODO: r should have type int16 but is uint8.
+       } else {
+               if a.HasRegister {
+                       p.errorf("multiple indirections")
+               }
+               a.HasRegister = true
+               a.Register = int16(r)
+       }
+       p.expect(')')
+       p.next()
+       return scaled
+}
+
+// scale converts a decimal string into a valid scale factor.
+func (p *Parser) scale(s string) int8 {
+       switch s {
+       case "1", "2", "4", "8":
+               return int8(s[0] - '0')
+       }
+       p.errorf("bad scale: %s", s)
+       return 0
+}
+
+// parse (R) or (R)(R*scale). The opening paren is known to be there.
+func (p *Parser) addressMode(a *addr.Addr) {
+       scaled := p.parenRegister(a)
+       if !scaled && p.peek() == '(' {
+               p.parenRegister(a)
+       }
+}
+
+// operand parses a general operand and stores the result in *a.
+func (p *Parser) operand(a *addr.Addr) bool {
+       if len(p.input) == 0 {
+               p.errorf("empty operand: cannot happen")
+               return false
+       }
+       switch p.peek() {
+       case '$':
+               p.next()
+               switch p.peek() {
+               case scanner.Ident:
+                       a.IsImmediateAddress = true
+                       p.operand(a) // TODO
+               case scanner.String:
+                       a.IsImmediateConstant = true
+                       a.HasString = true
+                       a.String = p.atos(p.next().String())
+               case scanner.Int, scanner.Float, '+', '-', '~', '(':
+                       a.IsImmediateConstant = true
+                       if p.have(scanner.Float) {
+                               a.HasFloat = true
+                               a.Float = p.floatExpr()
+                       } else {
+                               a.HasOffset = true
+                               a.Offset = int64(p.expr())
+                       }
+               default:
+                       p.errorf("illegal %s in immediate operand", p.next().String())
+               }
+       case '*':
+               p.next()
+               tok := p.next()
+               r, present := p.arch.Registers[tok.String()]
+               if !present {
+                       p.errorf("expected register; got %s", tok.String())
+               }
+               a.HasRegister = true
+               a.Register = int16(r)
+       case '(':
+               p.next()
+               if p.peek() == scanner.Ident {
+                       p.back()
+                       p.addressMode(a)
+                       break
+               }
+               p.back()
+               fallthrough
+       case '+', '-', '~', scanner.Int, scanner.Float:
+               if p.have(scanner.Float) {
+                       a.HasFloat = true
+                       a.Float = p.floatExpr()
+               } else {
+                       a.HasOffset = true
+                       a.Offset = int64(p.expr())
+               }
+               if p.peek() != scanner.EOF {
+                       p.expect('(')
+                       p.addressMode(a)
+               }
+       case scanner.Ident:
+               tok := p.next()
+               // Either R or (most general) ident<>+4(SB)(R*scale).
+               if r, present := p.arch.Registers[tok.String()]; present {
+                       a.HasRegister = true
+                       a.Register = int16(r)
+                       // Possibly register pair: DX:AX.
+                       if p.peek() == ':' {
+                               p.next()
+                               tok = p.get(scanner.Ident)
+                               a.HasRegister2 = true
+                               a.Register2 = int16(p.arch.Registers[tok.String()])
+                       }
+                       break
+               }
+               // Weirdness with statics: Might now have "<>".
+               if p.peek() == '<' {
+                       p.next()
+                       p.get('>')
+                       a.IsStatic = true
+               }
+               if p.peek() == '+' || p.peek() == '-' {
+                       a.HasOffset = true
+                       a.Offset = int64(p.expr())
+               }
+               a.Symbol = tok.String()
+               if p.peek() == scanner.EOF {
+                       break
+               }
+               // Expect (SB) or (FP)
+               p.expect('(')
+               p.parenRegister(a)
+               if a.Register != arch.RSB && a.Register != arch.RFP && a.Register != arch.RSP {
+                       p.errorf("expected SB, FP, or SP offset for %s", tok)
+               }
+               // Possibly have scaled register (CX*8).
+               if p.peek() != scanner.EOF {
+                       p.expect('(')
+                       p.addressMode(a)
+               }
+       default:
+               p.errorf("unexpected %s in operand", p.next())
+       }
+       p.expect(scanner.EOF)
+       return true
+}
+
+// expr = term | term '+' term
+func (p *Parser) expr() uint64 {
+       value := p.term()
+       for {
+               switch p.peek() {
+               case '+':
+                       p.next()
+                       x := p.term()
+                       if addOverflows(x, value) {
+                               p.errorf("overflow in %d+%d", value, x)
+                       }
+                       value += x
+               case '-':
+                       p.next()
+                       value -= p.term()
+               case '|':
+                       p.next()
+                       value |= p.term()
+               case '^':
+                       p.next()
+                       value ^= p.term()
+               default:
+                       return value
+               }
+       }
+}
+
+// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
+func (p *Parser) floatExpr() float64 {
+       tok := p.next()
+       switch tok.ScanToken {
+       case '(':
+               v := p.floatExpr()
+               if p.next().ScanToken != ')' {
+                       p.errorf("missing closing paren")
+               }
+               return v
+       case '+':
+               return +p.floatExpr()
+       case '-':
+               return -p.floatExpr()
+       case scanner.Float:
+               return p.atof(tok.String())
+       }
+       p.errorf("unexpected %s evaluating float expression", tok)
+       return 0
+}
+
+// term = const | term '*' term | '(' expr ')'
+func (p *Parser) term() uint64 {
+       tok := p.next()
+       switch tok.ScanToken {
+       case '(':
+               v := p.expr()
+               if p.next().ScanToken != ')' {
+                       p.errorf("missing closing paren")
+               }
+               return v
+       case '+':
+               return +p.term()
+       case '-':
+               return -p.term()
+       case '~':
+               return ^p.term()
+       case scanner.Int:
+               value := p.atoi(tok.String())
+               for {
+                       switch p.peek() {
+                       case '*':
+                               p.next()
+                               value *= p.term() // OVERFLOW?
+                       case '/':
+                               p.next()
+                               value /= p.term()
+                       case '%':
+                               p.next()
+                               value %= p.term()
+                       case lex.LSH:
+                               p.next()
+                               shift := p.term()
+                               if shift < 0 {
+                                       p.errorf("negative left shift %d", shift)
+                               }
+                               value <<= uint(shift)
+                       case lex.RSH:
+                               p.next()
+                               shift := p.term()
+                               if shift < 0 {
+                                       p.errorf("negative right shift %d", shift)
+                               }
+                               value >>= uint(shift)
+                       case '&':
+                               p.next()
+                               value &= p.term()
+                       default:
+                               return value
+                       }
+               }
+       }
+       p.errorf("unexpected %s evaluating expression", tok)
+       return 0
+}
+
+func (p *Parser) atoi(str string) uint64 {
+       value, err := strconv.ParseUint(str, 0, 64)
+       if err != nil {
+               p.errorf("%s", err)
+       }
+       return value
+}
+
+func (p *Parser) atof(str string) float64 {
+       value, err := strconv.ParseFloat(str, 64)
+       if err != nil {
+               p.errorf("%s", err)
+       }
+       return value
+}
+
+func (p *Parser) atos(str string) string {
+       value, err := strconv.Unquote(str)
+       if err != nil {
+               p.errorf("%s", err)
+       }
+       return value
+}
+
+// EOF represents the end of input.
+var EOF = lex.Make(scanner.EOF, "EOF")
+
+func (p *Parser) next() lex.Token {
+       if !p.more() {
+               return EOF
+       }
+       tok := p.input[p.inputPos]
+       p.inputPos++
+       return tok
+}
+
+func (p *Parser) back() {
+       p.inputPos--
+}
+
+func (p *Parser) peek() lex.ScanToken {
+       if p.more() {
+               return p.input[p.inputPos].ScanToken
+       }
+       return scanner.EOF
+}
+
+func (p *Parser) more() bool {
+       return p.inputPos < len(p.input)
+}
+
+// get verifies that the next item has the expected type and returns it.
+func (p *Parser) get(expected lex.ScanToken) lex.Token {
+       p.expect(expected)
+       return p.next()
+}
+
+// expect verifies that the next item has the expected type. It does not consume it.
+func (p *Parser) expect(expected lex.ScanToken) {
+       if p.peek() != expected {
+               p.errorf("expected %s, found %s", expected, p.next())
+       }
+}
+
+// have reports whether the remaining tokens contain the specified token.
+func (p *Parser) have(token lex.ScanToken) bool {
+       for i := p.inputPos; i < len(p.input); i++ {
+               if p.input[i].ScanToken == token {
+                       return true
+               }
+       }
+       return false
+}
diff --git a/src/cmd/asm/main.go b/src/cmd/asm/main.go
new file mode 100644 (file)
index 0000000..0835411
--- /dev/null
@@ -0,0 +1,63 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+       "flag"
+       "fmt"
+       "go/build"
+       "log"
+       "os"
+
+       "cmd/asm/internal/arch"
+       "cmd/asm/internal/asm"
+       "cmd/asm/internal/flags"
+       "cmd/asm/internal/lex"
+
+       "cmd/internal/obj"
+)
+
+func main() {
+       log.SetFlags(0)
+       log.SetPrefix("asm: ")
+
+       GOARCH := build.Default.GOARCH
+
+       architecture := arch.Set(GOARCH)
+       if architecture == nil {
+               log.Fatalf("asm: unrecognized architecture %s", GOARCH)
+       }
+
+       // Is this right?
+       flags.Parse(build.Default.GOROOT, build.Default.GOOS, GOARCH, architecture.Thechar)
+
+       // Create object file, write header.
+       fd, err := os.Create(*flags.OutputFile)
+       if err != nil {
+               log.Fatal(err)
+       }
+       ctxt := obj.Linknew(architecture.LinkArch)
+       if *flags.PrintOut {
+               ctxt.Debugasm = 1
+       }
+       ctxt.Bso = obj.Binitw(os.Stdout)
+       defer obj.Bflush(ctxt.Bso)
+       ctxt.Diag = log.Fatalf
+       output := obj.Binitw(fd)
+       fmt.Fprintf(output, "go object %s %s %s\n", obj.Getgoos(), obj.Getgoarch(), obj.Getgoversion())
+       fmt.Fprintf(output, "!\n")
+
+       lexer := lex.NewLexer(flag.Arg(0), ctxt)
+       parser := asm.NewParser(ctxt, architecture, lexer)
+       pList := obj.Linknewplist(ctxt)
+       var ok bool
+       pList.Firstpc, ok = parser.Parse()
+       if !ok {
+               log.Print("FAIL TODO")
+               os.Exit(1)
+       }
+       obj.Writeobjdirect(ctxt, output)
+       obj.Bflush(output)
+}