From: Rob Pike Date: Thu, 22 Jan 2015 18:48:02 +0000 (-0800) Subject: [dev.cc] cmd/asm: add lex internal package X-Git-Tag: go1.5beta1~1915^2~97 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=86426395754da512cb8017169d1be75dd6aca817;p=gostls13.git [dev.cc] cmd/asm: add lex internal package Add the lexing code for the new portable assembler. It is internal to the assembler, so lives in a subdirectory of cmd/asm/internal. Its only new dependency is the flags package for the assembler, so add that too; it's trivial. That package manages the command-line flags in a central place. The lexer builds on text/scanner to lex the input, including doing a Plan 9-level implementation of the C preprocessor. Change-Id: I262e8717b8c797010afaa5051920839906c0dd19 Reviewed-on: https://go-review.googlesource.com/3195 Reviewed-by: Russ Cox --- diff --git a/src/cmd/asm/internal/flags/flags.go b/src/cmd/asm/internal/flags/flags.go new file mode 100644 index 0000000000..61cd860cd1 --- /dev/null +++ b/src/cmd/asm/internal/flags/flags.go @@ -0,0 +1,70 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flags implements top-level flags and the usage message for the assembler. +package flags + +import ( + "flag" + "fmt" + "os" + "path/filepath" + "strings" +) + +var ( + OutputFile = flag.String("o", "", "output file; default foo.6 for /a/b/c/foo.s on arm64 (unused TODO)") + PrintOut = flag.Bool("S", true, "print assembly and machine code") // TODO: set to false + TrimPath = flag.String("trimpath", "", "remove prefix from recorded source file paths (unused TODO)") +) + +var ( + D MultiFlag + I MultiFlag +) + +func init() { + flag.Var(&D, "D", "predefined symbol with optional simple value -D=identifer=value; can be set multiple times") + flag.Var(&I, "I", "include directory; can be set multiple times") +} + +// MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2. +type MultiFlag []string + +func (m *MultiFlag) String() string { + return fmt.Sprint(*m) +} + +func (m *MultiFlag) Set(val string) error { + (*m) = append(*m, val) + return nil +} + +func Usage() { + fmt.Fprintf(os.Stderr, "usage: asm [options] file.s\n") + fmt.Fprintf(os.Stderr, "Flags:\n") + flag.PrintDefaults() + os.Exit(2) +} + +func Parse(goroot, goos, goarch string, theChar int) { // TODO: see below + flag.Usage = Usage + flag.Parse() + if flag.NArg() != 1 { + flag.Usage() + } + + // Flag refinement. + if *OutputFile == "" { + input := filepath.Base(flag.Arg(0)) + if strings.HasSuffix(input, ".s") { + input = input[:len(input)-2] + } + *OutputFile = fmt.Sprintf("%s.%c", input, theChar) + } + // Initialize to include $GOROOT/pkg/$GOOS_GOARCH/ so we find textflag.h + // TODO: Delete last line once asm is installed because the go command takes care of this. + // The arguments to Parse can be simplified then too. + I = append(I, filepath.Join(goroot, "pkg", fmt.Sprintf("%s_%s", goos, goarch))) +} diff --git a/src/cmd/asm/internal/lex/input.go b/src/cmd/asm/internal/lex/input.go new file mode 100644 index 0000000000..ae31998239 --- /dev/null +++ b/src/cmd/asm/internal/lex/input.go @@ -0,0 +1,414 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "text/scanner" + + "cmd/asm/internal/flags" + "cmd/internal/obj" +) + +// Input is the main input: a stack of readers and some macro definitions. +// It also handles #include processing (by pushing onto the input stack) +// and parses and instantiates macro definitions. +type Input struct { + Stack + includes []string + beginningOfLine bool + ifdefStack []bool + macros map[string]*Macro +} + +// NewInput returns a +func NewInput(name string) *Input { + return &Input{ + // include directories: look in source dir, then -I directories. + includes: append([]string{filepath.Dir(name)}, flags.I...), + beginningOfLine: true, + macros: predefine(flags.D), + } +} + +// predefine installs the macros set by the -D flag on the command line. +func predefine(defines flags.MultiFlag) map[string]*Macro { + macros := make(map[string]*Macro) + for _, name := range defines { + value := "1" + i := strings.IndexRune(name, '=') + if i > 0 { + name, value = name[:i], name[i+1:] + } + tokens := tokenize(name) + if len(tokens) != 1 || tokens[0].ScanToken != scanner.Ident { + fmt.Fprintf(os.Stderr, "asm: parsing -D: %q is not a valid identifier name\n", tokens[0]) + flags.Usage() + } + macros[name] = &Macro{ + name: name, + args: nil, + tokens: tokenize(value), + } + } + return macros +} + +func (in *Input) Error(args ...interface{}) { + fmt.Fprintf(os.Stderr, "%s:%d: %s", in.File(), in.Line(), fmt.Sprintln(args...)) + os.Exit(1) +} + +// expectText is like Error but adds "got XXX" where XXX is a quoted representation of the most recent token. +func (in *Input) expectText(args ...interface{}) { + in.Error(append(args, "; got", strconv.Quote(in.Text()))...) +} + +// enabled reports whether the input is enabled by an ifdef, or is at the top level. +func (in *Input) enabled() bool { + return len(in.ifdefStack) == 0 || in.ifdefStack[len(in.ifdefStack)-1] +} + +func (in *Input) expectNewline(directive string) { + tok := in.Stack.Next() + if tok != '\n' { + in.expectText("expected newline after", directive) + } +} + +func (in *Input) Next() ScanToken { + for { + tok := in.Stack.Next() + switch tok { + case '#': + if !in.beginningOfLine { + in.Error("'#' must be first item on line") + } + in.beginningOfLine = in.hash() + case scanner.Ident: + // Is it a macro name? + name := in.Stack.Text() + macro := in.macros[name] + if macro != nil { + in.invokeMacro(macro) + continue + } + fallthrough + default: + in.beginningOfLine = tok == '\n' + if in.enabled() { + return tok + } + } + } + in.Error("recursive macro invocation") + return 0 +} + +// hash processes a # preprocessor directive. It returns true iff it completes. +func (in *Input) hash() bool { + // We have a '#'; it must be followed by a known word (define, include, etc.). + tok := in.Stack.Next() + if tok != scanner.Ident { + in.expectText("expected identifier after '#'") + } + if !in.enabled() { + // Can only start including again if we are at #else or #endif. + // We let #line through because it might affect errors. + switch in.Text() { + case "else", "endif", "line": + // Press on. + default: + return false + } + } + switch in.Text() { + case "define": + in.define() + case "else": + in.else_() + case "endif": + in.endif() + case "ifdef": + in.ifdef(true) + case "ifndef": + in.ifdef(false) + case "include": + in.include() + case "line": + in.line() + case "undef": + in.undef() + default: + in.Error("unexpected identifier after '#':", in.Text()) + } + return true +} + +// macroName returns the name for the macro being referenced. +func (in *Input) macroName() string { + // We use the Stack's input method; no macro processing at this stage. + tok := in.Stack.Next() + if tok != scanner.Ident { + in.expectText("expected identifier after # directive") + } + // Name is alphanumeric by definition. + return in.Text() +} + +// #define processing. +func (in *Input) define() { + name := in.macroName() + args, tokens := in.macroDefinition(name) + in.defineMacro(name, args, tokens) +} + +// defineMacro stores the macro definition in the Input. +func (in *Input) defineMacro(name string, args []string, tokens []Token) { + if in.macros[name] != nil { + in.Error("redefinition of macro:", name) + } + in.macros[name] = &Macro{ + name: name, + args: args, + tokens: tokens, + } +} + +// macroDefinition returns the list of formals and the tokens of the definition. +// The argument list is nil for no parens on the definition; otherwise a list of +// formal argument names. +func (in *Input) macroDefinition(name string) ([]string, []Token) { + tok := in.Stack.Next() + if tok == '\n' || tok == scanner.EOF { + in.Error("no definition for macro:", name) + } + var args []string + if tok == '(' { + // Macro has arguments. Scan list of formals. + acceptArg := true + args = []string{} // Zero length but not nil. + Loop: + for { + tok = in.Stack.Next() + switch tok { + case ')': + tok = in.Stack.Next() // First token of macro definition. + break Loop + case ',': + if acceptArg { + in.Error("bad syntax in definition for macro:", name) + } + acceptArg = true + case scanner.Ident: + if !acceptArg { + in.Error("bad syntax in definition for macro:", name) + } + arg := in.Stack.Text() + if i := lookup(args, arg); i >= 0 { + in.Error("duplicate argument", arg, "in definition for macro:", name) + } + args = append(args, arg) + acceptArg = false + default: + in.Error("bad definition for macro:", name) + } + } + } + var tokens []Token + // Scan to newline. Backslashes escape newlines. + for tok != '\n' { + if tok == '\\' { + tok = in.Stack.Next() + if tok != '\n' && tok != '\\' { + in.Error(`can only escape \ or \n in definition for macro:`, name) + } + if tok == '\n' { // backslash-newline is discarded + tok = in.Stack.Next() + continue + } + } + tokens = append(tokens, Token{ScanToken(tok), in.Text()}) + tok = in.Stack.Next() + } + return args, tokens +} + +func lookup(args []string, arg string) int { + for i, a := range args { + if a == arg { + return i + } + } + return -1 +} + +// invokeMacro pushes onto the input Stack a Slice that holds the macro definition with the actual +// parameters substituted for the formals. +// Invoking a macro does not touch the PC/line history. +func (in *Input) invokeMacro(macro *Macro) { + actuals := in.argsFor(macro) + var tokens []Token + for _, tok := range macro.tokens { + if tok.ScanToken != scanner.Ident { + tokens = append(tokens, tok) + continue + } + substitution := actuals[tok.text] + if substitution == nil { + tokens = append(tokens, tok) + continue + } + tokens = append(tokens, substitution...) + } + in.Push(NewSlice(in.File(), in.Line(), tokens)) +} + +// argsFor returns a map from formal name to actual value for this macro invocation. +func (in *Input) argsFor(macro *Macro) map[string][]Token { + if macro.args == nil { + return nil + } + tok := in.Stack.Next() + if tok != '(' { + in.Error("missing arguments for invocation of macro:", macro.name) + } + var tokens []Token + args := make(map[string][]Token) + argNum := 0 + for { + tok = in.Stack.Next() + switch tok { + case scanner.EOF, '\n': + in.Error("unterminated arg list invoking macro:", macro.name) + case ',', ')': + if argNum >= len(macro.args) { + in.Error("too many arguments for macro:", macro.name) + } + if len(macro.args) == 0 && argNum == 0 && len(tokens) == 0 { + // Zero-argument macro invoked with no arguments. + return args + } + args[macro.args[argNum]] = tokens + tokens = nil + argNum++ + if tok == ')' { + if argNum != len(macro.args) { + in.Error("too few arguments for macro:", macro.name) + } + return args + } + default: + tokens = append(tokens, Token{tok, in.Stack.Text()}) + } + } +} + +// #ifdef and #ifndef processing. +func (in *Input) ifdef(truth bool) { + name := in.macroName() + in.expectNewline("#if[n]def") + if _, defined := in.macros[name]; !defined { + truth = !truth + } + in.ifdefStack = append(in.ifdefStack, truth) +} + +// #else processing +func (in *Input) else_() { + in.expectNewline("#else") + if len(in.ifdefStack) == 0 { + in.Error("unmatched #else") + } + in.ifdefStack[len(in.ifdefStack)-1] = !in.ifdefStack[len(in.ifdefStack)-1] +} + +// #endif processing. +func (in *Input) endif() { + in.expectNewline("#endif") + if len(in.ifdefStack) == 0 { + in.Error("unmatched #endif") + } + in.ifdefStack = in.ifdefStack[:len(in.ifdefStack)-1] +} + +// #include processing. +func (in *Input) include() { + // Find and parse string. + tok := in.Stack.Next() + if tok != scanner.String { + in.expectText("expected string after #include") + } + name, err := strconv.Unquote(in.Text()) + if err != nil { + in.Error("unquoting include file name: ", err) + } + in.expectNewline("#include") + // Push tokenizer for file onto stack. + fd, err := os.Open(name) + if err != nil { + for _, dir := range in.includes { + fd, err = os.Open(filepath.Join(dir, name)) + if err == nil { + break + } + } + if err != nil { + in.Error("#include:", err) + } + } + in.Push(NewTokenizer(name, fd, fd)) +} + +// #line processing. +func (in *Input) line() { + // Only need to handle Plan 9 format: #line 337 "filename" + tok := in.Stack.Next() + if tok != scanner.Int { + in.expectText("expected line number after #line") + } + line, err := strconv.Atoi(in.Stack.Text()) + if err != nil { + in.Error("error parsing #line (cannot happen):", err) + } + tok = in.Stack.Next() + if tok != scanner.String { + in.expectText("expected file name in #line") + } + file, err := strconv.Unquote(in.Stack.Text()) + if err != nil { + in.Error("unquoting #line file name: ", err) + } + obj.Linklinehist(linkCtxt, histLine, file, line) + in.Stack.SetPos(line, file) +} + +// #undef processing +func (in *Input) undef() { + name := in.macroName() + if in.macros[name] == nil { + in.Error("#undef for undefined macro:", name) + } + // Newline must be next. + tok := in.Stack.Next() + if tok != '\n' { + in.Error("syntax error in #undef for macro:", name) + } + delete(in.macros, name) +} + +func (in *Input) Push(r TokenReader) { + if len(in.tr) > 100 { + in.Error("input recursion") + } + in.Stack.Push(r) +} + +func (in *Input) Close() { +} diff --git a/src/cmd/asm/internal/lex/lex.go b/src/cmd/asm/internal/lex/lex.go new file mode 100644 index 0000000000..2153591e31 --- /dev/null +++ b/src/cmd/asm/internal/lex/lex.go @@ -0,0 +1,136 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package lex implements lexical analysis for the assembler. +package lex + +import ( + "fmt" + "log" + "os" + "strings" + "text/scanner" + + "cmd/internal/obj" +) + +// A ScanToken represents an input item. It is a simple wrapping of rune, as +// returned by text/scanner.Scanner, plus a couple of extra values. +type ScanToken rune + +const ( + // Asm defines some two-character lexemes. We make up + // a rune/ScanToken value for them - ugly but simple. + LSH ScanToken = -1000 - iota // << Left shift. + RSH // >> Logical right shift. + ARR // -> Used on ARM for shift type 3, arithmetic right shift. + ROT // @> Used on ARM for shift type 4, rotate right. +) + +func (t ScanToken) String() string { + switch t { + case scanner.EOF: + return "EOF" + case scanner.Ident: + return "identifier" + case scanner.Int: + return "integer constant" + case scanner.Float: + return "float constant" + case scanner.Char: + return "rune constant" + case scanner.String: + return "string constant" + case scanner.RawString: + return "raw string constant" + case scanner.Comment: + return "comment" + default: + return fmt.Sprintf("%q", rune(t)) + } +} + +var ( + // It might be nice if these weren't global. + linkCtxt *obj.Link // The link context for all instructions. + histLine int = 1 // The cumulative count of lines processed. +) + +// HistLine reports the cumulative source line number of the token, +// for use in the Prog structure for the linker. (It's always handling the +// instruction from the current lex line.) +func HistLine() int { + return histLine +} + +// NewLexer returns a lexer for the named file and the given link context. +func NewLexer(name string, ctxt *obj.Link) TokenReader { + linkCtxt = ctxt + input := NewInput(name) + fd, err := os.Open(name) + if err != nil { + log.Fatalf("asm: %s\n", err) + } + input.Push(NewTokenizer(name, fd, fd)) + return input +} + +// The other files in this directory each contain an implementation of TokenReader. + +// A TokenReader is like a reader, but returns lex tokens of type Token. It also can tell you what +// the text of the most recently returned token is, and where it was found. +// The underlying scanner elides all spaces except newline, so the input looks like a stream of +// Tokens; original spacing is lost but we don't need it. +type TokenReader interface { + // Next returns the next token. + Next() ScanToken + // The following methods all refer to the most recent token returned by Next. + // Text returns the original string representation of the token. + Text() string + // File reports the source file name of the token. + File() string + // Line reports the source line number of the token. + Line() int + // SetPos sets the file and line number. + SetPos(line int, file string) + // Close does any teardown required. + Close() +} + +// A Token is a scan token plus its string value. +// A macro is stored as a sequence of Tokens with spaces stripped. +type Token struct { + ScanToken + text string +} + +// Make returns a Token with the given rune (ScanToken) and text representation. +func Make(token ScanToken, text string) Token { + return Token{ScanToken: token, text: text} +} + +func (l Token) String() string { + return l.text +} + +// A Macro represents the definition of a #defined macro. +type Macro struct { + name string // The #define name. + args []string // Formal arguments. + tokens []Token // Body of macro. +} + +// tokenize turns a string into a list of Tokens; used to parse the -D flag. +func tokenize(str string) []Token { + t := NewTokenizer("command line", strings.NewReader(str), nil) + var tokens []Token + for { + tok := t.Next() + if tok == scanner.EOF { + break + } + tokens = append(tokens, Token{ScanToken: tok, text: t.Text()}) + } + return tokens +} diff --git a/src/cmd/asm/internal/lex/slice.go b/src/cmd/asm/internal/lex/slice.go new file mode 100644 index 0000000000..6ac72f469e --- /dev/null +++ b/src/cmd/asm/internal/lex/slice.go @@ -0,0 +1,54 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import "text/scanner" + +// A Slice reads from a slice of Tokens. +type Slice struct { + tokens []Token + fileName string + line int + pos int +} + +func NewSlice(fileName string, line int, tokens []Token) *Slice { + return &Slice{ + tokens: tokens, + fileName: fileName, + line: line, + pos: -1, // Next will advance to zero. + } +} + +func (s *Slice) Next() ScanToken { + s.pos++ + if s.pos >= len(s.tokens) { + return scanner.EOF + } + return s.tokens[s.pos].ScanToken +} + +func (s *Slice) Text() string { + return s.tokens[s.pos].text +} + +func (s *Slice) File() string { + return s.fileName +} + +func (s *Slice) Line() int { + return s.line +} + +func (s *Slice) SetPos(line int, file string) { + // Cannot happen because we only have slices of already-scanned + // text, but be prepared. + s.line = line + s.fileName = file +} + +func (s *Slice) Close() { +} diff --git a/src/cmd/asm/internal/lex/stack.go b/src/cmd/asm/internal/lex/stack.go new file mode 100644 index 0000000000..acd44012bf --- /dev/null +++ b/src/cmd/asm/internal/lex/stack.go @@ -0,0 +1,56 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import "text/scanner" + +// A Stack is a stack of TokenReaders. As the top TokenReader hits EOF, +// it resumes reading the next one down. +type Stack struct { + tr []TokenReader +} + +// Push adds tr to the top (ehd) of the input stack. (Popping happens automatically.) +func (s *Stack) Push(tr TokenReader) { + s.tr = append(s.tr, tr) +} + +func (s *Stack) Next() ScanToken { + tos := s.tr[len(s.tr)-1] + tok := tos.Next() + for tok == scanner.EOF && len(s.tr) > 1 { + tos.Close() + /* + // If it's not a macro (a Slice at this point), pop the line history stack and close the file descriptor. + if _, isMacro := tos.(*Slice); !isMacro { + // TODO: close file descriptor. + obj.Linklinehist(linkCtxt, histLine, "", 0) + } + */ + // Pop the topmost item from the stack and resume with the next one down. + s.tr = s.tr[:len(s.tr)-1] + tok = s.Next() + } + return tok +} + +func (s *Stack) Text() string { + return s.tr[len(s.tr)-1].Text() +} + +func (s *Stack) File() string { + return s.tr[len(s.tr)-1].File() +} + +func (s *Stack) Line() int { + return s.tr[len(s.tr)-1].Line() +} + +func (s *Stack) SetPos(line int, file string) { + s.tr[len(s.tr)-1].SetPos(line, file) +} + +func (s *Stack) Close() { // Unused. +} diff --git a/src/cmd/asm/internal/lex/tokenizer.go b/src/cmd/asm/internal/lex/tokenizer.go new file mode 100644 index 0000000000..6a6fdbc776 --- /dev/null +++ b/src/cmd/asm/internal/lex/tokenizer.go @@ -0,0 +1,146 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package lex + +import ( + "io" + "os" + "strings" + "text/scanner" + "unicode" + + "cmd/internal/obj" +) + +// A Tokenizer is a simple wrapping of text/scanner.Scanner, configured +// for our purposes and made a TokenReader. It forms the lowest level, +// turning text from readers into tokens. +type Tokenizer struct { + tok ScanToken + s *scanner.Scanner + line int + fileName string + file *os.File // If non-nil, file descriptor to close. +} + +func NewTokenizer(name string, r io.Reader, file *os.File) *Tokenizer { + var s scanner.Scanner + s.Init(r) + // Newline is like a semicolon; other space characters are fine. + s.Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' + // Don't skip comments: we need to count newlines. + s.Mode = scanner.ScanChars | + scanner.ScanFloats | + scanner.ScanIdents | + scanner.ScanInts | + scanner.ScanStrings | + scanner.ScanComments + s.Position.Filename = name + s.IsIdentRune = isIdentRune + obj.Linklinehist(linkCtxt, histLine, name, 0) + return &Tokenizer{ + s: &s, + line: 1, + fileName: name, + file: file, + } +} + +// We want center dot (·) and division slash (∕) to work as identifier characters. +func isIdentRune(ch rune, i int) bool { + if unicode.IsLetter(ch) { + return true + } + switch ch { + case '_': // Underscore; traditional. + return true + case '\u00B7': // Represents the period in runtime.exit. U+00B7 '·' middle dot + return true + case '\u2215': // Represents the slash in runtime/debug.setGCPercent. U+2215 '∕' division slash + return true + } + // Digits are OK only after the first character. + return i > 0 && unicode.IsDigit(ch) +} + +func (t *Tokenizer) Text() string { + switch t.tok { + case LSH: + return "<<" + case RSH: + return ">>" + case ARR: + return "->" + case ROT: + return "@>" + } + return t.s.TokenText() +} + +func (t *Tokenizer) File() string { + return t.fileName +} + +func (t *Tokenizer) Line() int { + return t.line +} + +func (t *Tokenizer) SetPos(line int, file string) { + t.line = line + t.fileName = file +} + +func (t *Tokenizer) Next() ScanToken { + s := t.s + for { + t.tok = ScanToken(s.Scan()) + if t.tok != scanner.Comment { + break + } + length := strings.Count(s.TokenText(), "\n") + t.line += length + histLine += length + // TODO: If we ever have //go: comments in assembly, will need to keep them here. + // For now, just discard all comments. + } + switch t.tok { + case '\n': + histLine++ + t.line++ + case '-': + if s.Peek() == '>' { + s.Next() + t.tok = ARR + return ARR + } + case '@': + if s.Peek() == '>' { + s.Next() + t.tok = ROT + return ROT + } + case '<': + if s.Peek() == '<' { + s.Next() + t.tok = LSH + return LSH + } + case '>': + if s.Peek() == '>' { + s.Next() + t.tok = RSH + return RSH + } + } + return t.tok +} + +func (t *Tokenizer) Close() { + if t.file != nil { + t.file.Close() + // It's an open file, so pop the line history. + obj.Linklinehist(linkCtxt, histLine, "", 0) + } +}