From: Robert Griesemer Date: Wed, 5 Oct 2011 17:34:01 +0000 (-0700) Subject: ebnf: use scanner instead of go/scanner X-Git-Tag: weekly.2011-10-06~24 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0da66a2e902ad36729d3facc887af3144a7d8336;p=gostls13.git ebnf: use scanner instead of go/scanner R=rsc, r CC=golang-dev https://golang.org/cl/5192043 --- diff --git a/src/cmd/ebnflint/ebnflint.go b/src/cmd/ebnflint/ebnflint.go index 009b336f39..6f307b7060 100644 --- a/src/cmd/ebnflint/ebnflint.go +++ b/src/cmd/ebnflint/ebnflint.go @@ -98,12 +98,12 @@ func main() { src = extractEBNF(src) } - grammar, err := ebnf.Parse(fset, filename, src) + grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src)) if err != nil { report(err) } - if err = ebnf.Verify(fset, grammar, *start); err != nil { + if err = ebnf.Verify(grammar, *start); err != nil { report(err) } } diff --git a/src/pkg/ebnf/ebnf.go b/src/pkg/ebnf/ebnf.go index 69da117672..2ec7f00800 100644 --- a/src/pkg/ebnf/ebnf.go +++ b/src/pkg/ebnf/ebnf.go @@ -23,13 +23,39 @@ package ebnf import ( - "go/scanner" - "go/token" + "fmt" "os" + "scanner" "unicode" "utf8" ) +// ---------------------------------------------------------------------------- +// Error handling + +type errorList []os.Error + +func (list errorList) Error() os.Error { + if len(list) == 0 { + return nil + } + return list +} + +func (list errorList) String() string { + switch len(list) { + case 0: + return "no errors" + case 1: + return list[0].String() + } + return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1) +} + +func newError(pos scanner.Position, msg string) os.Error { + return os.NewError(fmt.Sprintf("%s: %s", pos, msg)) +} + // ---------------------------------------------------------------------------- // Internal representation @@ -37,7 +63,7 @@ type ( // An Expression node represents a production expression. Expression interface { // Pos is the position of the first character of the syntactic construct - Pos() token.Pos + Pos() scanner.Position } // An Alternative node represents a non-empty list of alternative expressions. @@ -48,13 +74,13 @@ type ( // A Name node represents a production name. Name struct { - StringPos token.Pos + StringPos scanner.Position String string } // A Token node represents a literal. Token struct { - StringPos token.Pos + StringPos scanner.Position String string } @@ -65,50 +91,50 @@ type ( // A Group node represents a grouped expression. Group struct { - Lparen token.Pos + Lparen scanner.Position Body Expression // (body) } // An Option node represents an optional expression. Option struct { - Lbrack token.Pos + Lbrack scanner.Position Body Expression // [body] } // A Repetition node represents a repeated expression. Repetition struct { - Lbrace token.Pos + Lbrace scanner.Position Body Expression // {body} } - // A Bad node stands for pieces of source code that lead to a parse error. - Bad struct { - TokPos token.Pos - Error string // parser error message - } - // A Production node represents an EBNF production. Production struct { Name *Name Expr Expression } + // A Bad node stands for pieces of source code that lead to a parse error. + Bad struct { + TokPos scanner.Position + Error string // parser error message + } + // A Grammar is a set of EBNF productions. The map // is indexed by production name. // Grammar map[string]*Production ) -func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative -func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences -func (x *Name) Pos() token.Pos { return x.StringPos } -func (x *Token) Pos() token.Pos { return x.StringPos } -func (x *Range) Pos() token.Pos { return x.Begin.Pos() } -func (x *Group) Pos() token.Pos { return x.Lparen } -func (x *Option) Pos() token.Pos { return x.Lbrack } -func (x *Repetition) Pos() token.Pos { return x.Lbrace } -func (x *Bad) Pos() token.Pos { return x.TokPos } -func (x *Production) Pos() token.Pos { return x.Name.Pos() } +func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative +func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences +func (x *Name) Pos() scanner.Position { return x.StringPos } +func (x *Token) Pos() scanner.Position { return x.StringPos } +func (x *Range) Pos() scanner.Position { return x.Begin.Pos() } +func (x *Group) Pos() scanner.Position { return x.Lparen } +func (x *Option) Pos() scanner.Position { return x.Lbrack } +func (x *Repetition) Pos() scanner.Position { return x.Lbrace } +func (x *Production) Pos() scanner.Position { return x.Name.Pos() } +func (x *Bad) Pos() scanner.Position { return x.TokPos } // ---------------------------------------------------------------------------- // Grammar verification @@ -119,15 +145,14 @@ func isLexical(name string) bool { } type verifier struct { - fset *token.FileSet - scanner.ErrorVector + errors errorList worklist []*Production reached Grammar // set of productions reached from (and including) the root production grammar Grammar } -func (v *verifier) error(pos token.Pos, msg string) { - v.Error(v.fset.Position(pos), msg) +func (v *verifier) error(pos scanner.Position, msg string) { + v.errors = append(v.errors, newError(pos, msg)) } func (v *verifier) push(prod *Production) { @@ -187,24 +212,23 @@ func (v *verifier) verifyExpr(expr Expression, lexical bool) { v.verifyExpr(x.Body, lexical) case *Repetition: v.verifyExpr(x.Body, lexical) + case *Bad: + v.error(x.Pos(), x.Error) default: - panic("unreachable") + panic(fmt.Sprintf("internal error: unexpected type %T", expr)) } } -func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) { +func (v *verifier) verify(grammar Grammar, start string) { // find root production root, found := grammar[start] if !found { - // token.NoPos doesn't require a file set; - // ok to set v.fset only afterwards - v.error(token.NoPos, "no start production "+start) + var noPos scanner.Position + v.error(noPos, "no start production "+start) return } // initialize verifier - v.fset = fset - v.ErrorVector.Reset() v.worklist = v.worklist[0:0] v.reached = make(Grammar) v.grammar = grammar @@ -238,8 +262,8 @@ func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) { // // Position information is interpreted relative to the file set fset. // -func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error { +func Verify(grammar Grammar, start string) os.Error { var v verifier - v.verify(fset, grammar, start) - return v.GetError(scanner.Sorted) + v.verify(grammar, start) + return v.errors.Error() } diff --git a/src/pkg/ebnf/ebnf_test.go b/src/pkg/ebnf/ebnf_test.go index b086facc3e..8cfd6b9c37 100644 --- a/src/pkg/ebnf/ebnf_test.go +++ b/src/pkg/ebnf/ebnf_test.go @@ -5,13 +5,10 @@ package ebnf import ( - "go/token" - "io/ioutil" + "bytes" "testing" ) -var fset = token.NewFileSet() - var goodGrammars = []string{ `Program = .`, @@ -46,18 +43,19 @@ var badGrammars = []string{ `Program = {} .`, } -func checkGood(t *testing.T, filename string, src []byte) { - grammar, err := Parse(fset, filename, src) +func checkGood(t *testing.T, src string) { + grammar, err := Parse("", bytes.NewBuffer([]byte(src))) if err != nil { t.Errorf("Parse(%s) failed: %v", src, err) + return } - if err = Verify(fset, grammar, "Program"); err != nil { + if err = Verify(grammar, "Program"); err != nil { t.Errorf("Verify(%s) failed: %v", src, err) } } -func checkBad(t *testing.T, filename string, src []byte) { - _, err := Parse(fset, filename, src) +func checkBad(t *testing.T, src string) { + _, err := Parse("", bytes.NewBuffer([]byte(src))) if err == nil { t.Errorf("Parse(%s) should have failed", src) } @@ -65,23 +63,9 @@ func checkBad(t *testing.T, filename string, src []byte) { func TestGrammars(t *testing.T) { for _, src := range goodGrammars { - checkGood(t, "", []byte(src)) + checkGood(t, src) } for _, src := range badGrammars { - checkBad(t, "", []byte(src)) - } -} - -var files = []string{ -// TODO(gri) add some test files -} - -func TestFiles(t *testing.T) { - for _, filename := range files { - src, err := ioutil.ReadFile(filename) - if err != nil { - t.Fatal(err) - } - checkGood(t, filename, src) + checkBad(t, src) } } diff --git a/src/pkg/ebnf/parser.go b/src/pkg/ebnf/parser.go index ef2fac0000..2dbbefb751 100644 --- a/src/pkg/ebnf/parser.go +++ b/src/pkg/ebnf/parser.go @@ -5,51 +5,47 @@ package ebnf import ( - "go/scanner" - "go/token" + "io" "os" + "scanner" "strconv" ) type parser struct { - fset *token.FileSet - scanner.ErrorVector + errors errorList scanner scanner.Scanner - pos token.Pos // token position - tok token.Token // one token look-ahead - lit string // token literal + pos scanner.Position // token position + tok int // one token look-ahead + lit string // token literal } func (p *parser) next() { - p.pos, p.tok, p.lit = p.scanner.Scan() - if p.tok.IsKeyword() { - // TODO Should keyword mapping always happen outside scanner? - // Or should there be a flag to scanner to enable keyword mapping? - p.tok = token.IDENT - } + p.tok = p.scanner.Scan() + p.pos = p.scanner.Position + p.lit = p.scanner.TokenText() } -func (p *parser) error(pos token.Pos, msg string) { - p.Error(p.fset.Position(pos), msg) +func (p *parser) error(pos scanner.Position, msg string) { + p.errors = append(p.errors, newError(pos, msg)) } -func (p *parser) errorExpected(pos token.Pos, msg string) { - msg = "expected " + msg - if pos == p.pos { +func (p *parser) errorExpected(pos scanner.Position, msg string) { + msg = `expected "` + msg + `"` + if pos.Offset == p.pos.Offset { // the error happened at the current position; // make the error message more specific - msg += ", found '" + p.tok.String() + "'" - if p.tok.IsLiteral() { + msg += ", found " + scanner.TokenString(p.tok) + if p.tok < 0 { msg += " " + p.lit } } p.error(pos, msg) } -func (p *parser) expect(tok token.Token) token.Pos { +func (p *parser) expect(tok int) scanner.Position { pos := p.pos if p.tok != tok { - p.errorExpected(pos, "'"+tok.String()+"'") + p.errorExpected(pos, scanner.TokenString(tok)) } p.next() // make progress in any case return pos @@ -58,21 +54,21 @@ func (p *parser) expect(tok token.Token) token.Pos { func (p *parser) parseIdentifier() *Name { pos := p.pos name := p.lit - p.expect(token.IDENT) + p.expect(scanner.Ident) return &Name{pos, name} } func (p *parser) parseToken() *Token { pos := p.pos value := "" - if p.tok == token.STRING { + if p.tok == scanner.String { value, _ = strconv.Unquote(p.lit) // Unquote may fail with an error, but only if the scanner found // an illegal string in the first place. In this case the error // has already been reported. p.next() } else { - p.expect(token.STRING) + p.expect(scanner.String) } return &Token{pos, value} } @@ -82,32 +78,32 @@ func (p *parser) parseTerm() (x Expression) { pos := p.pos switch p.tok { - case token.IDENT: + case scanner.Ident: x = p.parseIdentifier() - case token.STRING: + case scanner.String: tok := p.parseToken() x = tok - const ellipsis = "…" // U+2026, the horizontal ellipsis character - if p.tok == token.ILLEGAL && p.lit == ellipsis { + const ellipsis = '…' // U+2026, the horizontal ellipsis character + if p.tok == ellipsis { p.next() x = &Range{tok, p.parseToken()} } - case token.LPAREN: + case '(': p.next() x = &Group{pos, p.parseExpression()} - p.expect(token.RPAREN) + p.expect(')') - case token.LBRACK: + case '[': p.next() x = &Option{pos, p.parseExpression()} - p.expect(token.RBRACK) + p.expect(']') - case token.LBRACE: + case '{': p.next() x = &Repetition{pos, p.parseExpression()} - p.expect(token.RBRACE) + p.expect('}') } return x @@ -137,7 +133,7 @@ func (p *parser) parseExpression() Expression { for { list = append(list, p.parseSequence()) - if p.tok != token.OR { + if p.tok != '|' { break } p.next() @@ -154,24 +150,22 @@ func (p *parser) parseExpression() Expression { func (p *parser) parseProduction() *Production { name := p.parseIdentifier() - p.expect(token.ASSIGN) + p.expect('=') var expr Expression - if p.tok != token.PERIOD { + if p.tok != '.' { expr = p.parseExpression() } - p.expect(token.PERIOD) + p.expect('.') return &Production{name, expr} } -func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar { - // initialize parser - p.fset = fset - p.ErrorVector.Reset() - p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars) +func (p *parser) parse(filename string, src io.Reader) Grammar { + p.scanner.Init(src) + p.scanner.Filename = filename p.next() // initializes pos, tok, lit grammar := make(Grammar) - for p.tok != token.EOF { + for p.tok != scanner.EOF { prod := p.parseProduction() name := prod.Name.String if _, found := grammar[name]; !found { @@ -187,11 +181,11 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar // Parse parses a set of EBNF productions from source src. // It returns a set of productions. Errors are reported // for incorrect syntax and if a production is declared -// more than once. Position information is recorded relative -// to the file set fset. +// more than once; the filename is used only for error +// positions. // -func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) { +func Parse(filename string, src io.Reader) (Grammar, os.Error) { var p parser - grammar := p.parse(fset, filename, src) - return grammar, p.GetError(scanner.Sorted) + grammar := p.parse(filename, src) + return grammar, p.errors.Error() }