ebnf: use scanner instead of go/scanner

author Robert Griesemer <gri@golang.org>

Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)

committer Robert Griesemer <gri@golang.org>

Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)
author Robert Griesemer <gri@golang.org>
Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)
committer Robert Griesemer <gri@golang.org>
Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)
diff --git a/src/cmd/ebnflint/ebnflint.go b/src/cmd/ebnflint/ebnflint.go

index 009b336f391a5101f719ed03cc4f2cb9bcb0d504..6f307b7060ce1d44616af6f6879877d5ca4c9cbf 100644 (file)
--- a/src/cmd/ebnflint/ebnflint.go
+++ b/src/cmd/ebnflint/ebnflint.go
@@ -98,12 +98,12 @@ func main() {
                 src = extractEBNF(src)
         }
  
-       grammar, err := ebnf.Parse(fset, filename, src)
+       grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src))
         if err != nil {
                 report(err)
         }
  
-       if err = ebnf.Verify(fset, grammar, *start); err != nil {
+       if err = ebnf.Verify(grammar, *start); err != nil {
                 report(err)
         }
  }
diff --git a/src/pkg/ebnf/ebnf.go b/src/pkg/ebnf/ebnf.go

index 69da1176725f83a5737dabe1f5a04ed47230a2e2..2ec7f00800dc96b4b83f54b077932180d5d92f35 100644 (file)
--- a/src/pkg/ebnf/ebnf.go
+++ b/src/pkg/ebnf/ebnf.go
@@ -23,13 +23,39 @@
  package ebnf
  
  import (
-       "go/scanner"
-       "go/token"
+       "fmt"
         "os"
+       "scanner"
         "unicode"
         "utf8"
  )
  
+// ----------------------------------------------------------------------------
+// Error handling
+
+type errorList []os.Error
+
+func (list errorList) Error() os.Error {
+       if len(list) == 0 {
+               return nil
+       }
+       return list
+}
+
+func (list errorList) String() string {
+       switch len(list) {
+       case 0:
+               return "no errors"
+       case 1:
+               return list[0].String()
+       }
+       return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
+}
+
+func newError(pos scanner.Position, msg string) os.Error {
+       return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
+}
+
  // ----------------------------------------------------------------------------
  // Internal representation
  
@@ -37,7 +63,7 @@ type (
         // An Expression node represents a production expression.
         Expression interface {
                 // Pos is the position of the first character of the syntactic construct
-               Pos() token.Pos
+               Pos() scanner.Position
         }
  
         // An Alternative node represents a non-empty list of alternative expressions.
@@ -48,13 +74,13 @@ type (
  
         // A Name node represents a production name.
         Name struct {
-               StringPos token.Pos
+               StringPos scanner.Position
                 String    string
         }
  
         // A Token node represents a literal.
         Token struct {
-               StringPos token.Pos
+               StringPos scanner.Position
                 String    string
         }
  
@@ -65,50 +91,50 @@ type (
  
         // A Group node represents a grouped expression.
         Group struct {
-               Lparen token.Pos
+               Lparen scanner.Position
                 Body   Expression // (body)
         }
  
         // An Option node represents an optional expression.
         Option struct {
-               Lbrack token.Pos
+               Lbrack scanner.Position
                 Body   Expression // [body]
         }
  
         // A Repetition node represents a repeated expression.
         Repetition struct {
-               Lbrace token.Pos
+               Lbrace scanner.Position
                 Body   Expression // {body}
         }
  
-       // A Bad node stands for pieces of source code that lead to a parse error.
-       Bad struct {
-               TokPos token.Pos
-               Error  string // parser error message
-       }
-
         // A Production node represents an EBNF production.
         Production struct {
                 Name *Name
                 Expr Expression
         }
  
+       // A Bad node stands for pieces of source code that lead to a parse error.
+       Bad struct {
+               TokPos scanner.Position
+               Error  string // parser error message
+       }
+
         // A Grammar is a set of EBNF productions. The map
         // is indexed by production name.
         //
         Grammar map[string]*Production
  )
  
-func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
-func (x Sequence) Pos() token.Pos    { return x[0].Pos() } // the parser always generates non-empty Sequences
-func (x *Name) Pos() token.Pos       { return x.StringPos }
-func (x *Token) Pos() token.Pos      { return x.StringPos }
-func (x *Range) Pos() token.Pos      { return x.Begin.Pos() }
-func (x *Group) Pos() token.Pos      { return x.Lparen }
-func (x *Option) Pos() token.Pos     { return x.Lbrack }
-func (x *Repetition) Pos() token.Pos { return x.Lbrace }
-func (x *Bad) Pos() token.Pos        { return x.TokPos }
-func (x *Production) Pos() token.Pos { return x.Name.Pos() }
+func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
+func (x Sequence) Pos() scanner.Position    { return x[0].Pos() } // the parser always generates non-empty Sequences
+func (x *Name) Pos() scanner.Position       { return x.StringPos }
+func (x *Token) Pos() scanner.Position      { return x.StringPos }
+func (x *Range) Pos() scanner.Position      { return x.Begin.Pos() }
+func (x *Group) Pos() scanner.Position      { return x.Lparen }
+func (x *Option) Pos() scanner.Position     { return x.Lbrack }
+func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
+func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
+func (x *Bad) Pos() scanner.Position        { return x.TokPos }
  
  // ----------------------------------------------------------------------------
  // Grammar verification
@@ -119,15 +145,14 @@ func isLexical(name string) bool {
  }
  
  type verifier struct {
-       fset *token.FileSet
-       scanner.ErrorVector
+       errors   errorList
         worklist []*Production
         reached  Grammar // set of productions reached from (and including) the root production
         grammar  Grammar
  }
  
-func (v *verifier) error(pos token.Pos, msg string) {
-       v.Error(v.fset.Position(pos), msg)
+func (v *verifier) error(pos scanner.Position, msg string) {
+       v.errors = append(v.errors, newError(pos, msg))
  }
  
  func (v *verifier) push(prod *Production) {
@@ -187,24 +212,23 @@ func (v *verifier) verifyExpr(expr Expression, lexical bool) {
                 v.verifyExpr(x.Body, lexical)
         case *Repetition:
                 v.verifyExpr(x.Body, lexical)
+       case *Bad:
+               v.error(x.Pos(), x.Error)
         default:
-               panic("unreachable")
+               panic(fmt.Sprintf("internal error: unexpected type %T", expr))
         }
  }
  
-func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
+func (v *verifier) verify(grammar Grammar, start string) {
         // find root production
         root, found := grammar[start]
         if !found {
-               // token.NoPos doesn't require a file set;
-               // ok to set v.fset only afterwards
-               v.error(token.NoPos, "no start production "+start)
+               var noPos scanner.Position
+               v.error(noPos, "no start production "+start)
                 return
         }
  
         // initialize verifier
-       v.fset = fset
-       v.ErrorVector.Reset()
         v.worklist = v.worklist[0:0]
         v.reached = make(Grammar)
         v.grammar = grammar
@@ -238,8 +262,8 @@ func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
  //
  // Position information is interpreted relative to the file set fset.
  //
-func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
+func Verify(grammar Grammar, start string) os.Error {
         var v verifier
-       v.verify(fset, grammar, start)
-       return v.GetError(scanner.Sorted)
+       v.verify(grammar, start)
+       return v.errors.Error()
  }
diff --git a/src/pkg/ebnf/ebnf_test.go b/src/pkg/ebnf/ebnf_test.go

index b086facc3ed3a24cae0f4a9be8cfd64e58e7a130..8cfd6b9c37037ca1753df4580ac71581f540766a 100644 (file)
--- a/src/pkg/ebnf/ebnf_test.go
+++ b/src/pkg/ebnf/ebnf_test.go
@@ -5,13 +5,10 @@
  package ebnf
  
  import (
-       "go/token"
-       "io/ioutil"
+       "bytes"
         "testing"
  )
  
-var fset = token.NewFileSet()
-
  var goodGrammars = []string{
         `Program = .`,
  
@@ -46,18 +43,19 @@ var badGrammars = []string{
         `Program = {} .`,
  }
  
-func checkGood(t *testing.T, filename string, src []byte) {
-       grammar, err := Parse(fset, filename, src)
+func checkGood(t *testing.T, src string) {
+       grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
         if err != nil {
                 t.Errorf("Parse(%s) failed: %v", src, err)
+               return
         }
-       if err = Verify(fset, grammar, "Program"); err != nil {
+       if err = Verify(grammar, "Program"); err != nil {
                 t.Errorf("Verify(%s) failed: %v", src, err)
         }
  }
  
-func checkBad(t *testing.T, filename string, src []byte) {
-       _, err := Parse(fset, filename, src)
+func checkBad(t *testing.T, src string) {
+       _, err := Parse("", bytes.NewBuffer([]byte(src)))
         if err == nil {
                 t.Errorf("Parse(%s) should have failed", src)
         }
@@ -65,23 +63,9 @@ func checkBad(t *testing.T, filename string, src []byte) {
  
  func TestGrammars(t *testing.T) {
         for _, src := range goodGrammars {
-               checkGood(t, "", []byte(src))
+               checkGood(t, src)
         }
         for _, src := range badGrammars {
-               checkBad(t, "", []byte(src))
-       }
-}
-
-var files = []string{
-// TODO(gri) add some test files
-}
-
-func TestFiles(t *testing.T) {
-       for _, filename := range files {
-               src, err := ioutil.ReadFile(filename)
-               if err != nil {
-                       t.Fatal(err)
-               }
-               checkGood(t, filename, src)
+               checkBad(t, src)
         }
  }
diff --git a/src/pkg/ebnf/parser.go b/src/pkg/ebnf/parser.go

index ef2fac0000f0c89f56efbe55f49cf4f50c764e50..2dbbefb751946b3506b3c9aa013acde72adc4860 100644 (file)
--- a/src/pkg/ebnf/parser.go
+++ b/src/pkg/ebnf/parser.go
@@ -5,51 +5,47 @@
  package ebnf
  
  import (
-       "go/scanner"
-       "go/token"
+       "io"
         "os"
+       "scanner"
         "strconv"
  )
  
  type parser struct {
-       fset *token.FileSet
-       scanner.ErrorVector
+       errors  errorList
         scanner scanner.Scanner
-       pos     token.Pos   // token position
-       tok     token.Token // one token look-ahead
-       lit     string      // token literal
+       pos     scanner.Position // token position
+       tok     int              // one token look-ahead
+       lit     string           // token literal
  }
  
  func (p *parser) next() {
-       p.pos, p.tok, p.lit = p.scanner.Scan()
-       if p.tok.IsKeyword() {
-               // TODO Should keyword mapping always happen outside scanner?
-               //      Or should there be a flag to scanner to enable keyword mapping?
-               p.tok = token.IDENT
-       }
+       p.tok = p.scanner.Scan()
+       p.pos = p.scanner.Position
+       p.lit = p.scanner.TokenText()
  }
  
-func (p *parser) error(pos token.Pos, msg string) {
-       p.Error(p.fset.Position(pos), msg)
+func (p *parser) error(pos scanner.Position, msg string) {
+       p.errors = append(p.errors, newError(pos, msg))
  }
  
-func (p *parser) errorExpected(pos token.Pos, msg string) {
-       msg = "expected " + msg
-       if pos == p.pos {
+func (p *parser) errorExpected(pos scanner.Position, msg string) {
+       msg = `expected "` + msg + `"`
+       if pos.Offset == p.pos.Offset {
                 // the error happened at the current position;
                 // make the error message more specific
-               msg += ", found '" + p.tok.String() + "'"
-               if p.tok.IsLiteral() {
+               msg += ", found " + scanner.TokenString(p.tok)
+               if p.tok < 0 {
                         msg += " " + p.lit
                 }
         }
         p.error(pos, msg)
  }
  
-func (p *parser) expect(tok token.Token) token.Pos {
+func (p *parser) expect(tok int) scanner.Position {
         pos := p.pos
         if p.tok != tok {
-               p.errorExpected(pos, "'"+tok.String()+"'")
+               p.errorExpected(pos, scanner.TokenString(tok))
         }
         p.next() // make progress in any case
         return pos
@@ -58,21 +54,21 @@ func (p *parser) expect(tok token.Token) token.Pos {
  func (p *parser) parseIdentifier() *Name {
         pos := p.pos
         name := p.lit
-       p.expect(token.IDENT)
+       p.expect(scanner.Ident)
         return &Name{pos, name}
  }
  
  func (p *parser) parseToken() *Token {
         pos := p.pos
         value := ""
-       if p.tok == token.STRING {
+       if p.tok == scanner.String {
                 value, _ = strconv.Unquote(p.lit)
                 // Unquote may fail with an error, but only if the scanner found
                 // an illegal string in the first place. In this case the error
                 // has already been reported.
                 p.next()
         } else {
-               p.expect(token.STRING)
+               p.expect(scanner.String)
         }
         return &Token{pos, value}
  }
@@ -82,32 +78,32 @@ func (p *parser) parseTerm() (x Expression) {
         pos := p.pos
  
         switch p.tok {
-       case token.IDENT:
+       case scanner.Ident:
                 x = p.parseIdentifier()
  
-       case token.STRING:
+       case scanner.String:
                 tok := p.parseToken()
                 x = tok
-               const ellipsis = "…" // U+2026, the horizontal ellipsis character
-               if p.tok == token.ILLEGAL && p.lit == ellipsis {
+               const ellipsis = '…' // U+2026, the horizontal ellipsis character
+               if p.tok == ellipsis {
                         p.next()
                         x = &Range{tok, p.parseToken()}
                 }
  
-       case token.LPAREN:
+       case '(':
                 p.next()
                 x = &Group{pos, p.parseExpression()}
-               p.expect(token.RPAREN)
+               p.expect(')')
  
-       case token.LBRACK:
+       case '[':
                 p.next()
                 x = &Option{pos, p.parseExpression()}
-               p.expect(token.RBRACK)
+               p.expect(']')
  
-       case token.LBRACE:
+       case '{':
                 p.next()
                 x = &Repetition{pos, p.parseExpression()}
-               p.expect(token.RBRACE)
+               p.expect('}')
         }
  
         return x
@@ -137,7 +133,7 @@ func (p *parser) parseExpression() Expression {
  
         for {
                 list = append(list, p.parseSequence())
-               if p.tok != token.OR {
+               if p.tok != '|' {
                         break
                 }
                 p.next()
@@ -154,24 +150,22 @@ func (p *parser) parseExpression() Expression {
  
  func (p *parser) parseProduction() *Production {
         name := p.parseIdentifier()
-       p.expect(token.ASSIGN)
+       p.expect('=')
         var expr Expression
-       if p.tok != token.PERIOD {
+       if p.tok != '.' {
                 expr = p.parseExpression()
         }
-       p.expect(token.PERIOD)
+       p.expect('.')
         return &Production{name, expr}
  }
  
-func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
-       // initialize parser
-       p.fset = fset
-       p.ErrorVector.Reset()
-       p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
+func (p *parser) parse(filename string, src io.Reader) Grammar {
+       p.scanner.Init(src)
+       p.scanner.Filename = filename
         p.next() // initializes pos, tok, lit
  
         grammar := make(Grammar)
-       for p.tok != token.EOF {
+       for p.tok != scanner.EOF {
                 prod := p.parseProduction()
                 name := prod.Name.String
                 if _, found := grammar[name]; !found {
@@ -187,11 +181,11 @@ func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar
  // Parse parses a set of EBNF productions from source src.
  // It returns a set of productions. Errors are reported
  // for incorrect syntax and if a production is declared
-// more than once. Position information is recorded relative
-// to the file set fset.
+// more than once; the filename is used only for error
+// positions.
  //
-func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
+func Parse(filename string, src io.Reader) (Grammar, os.Error) {
         var p parser
-       grammar := p.parse(fset, filename, src)
-       return grammar, p.GetError(scanner.Sorted)
+       grammar := p.parse(filename, src)
+       return grammar, p.errors.Error()
  }
author	Robert Griesemer <gri@golang.org>
	Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)
committer	Robert Griesemer <gri@golang.org>
	Wed, 5 Oct 2011 17:34:01 +0000 (10:34 -0700)
src/cmd/ebnflint/ebnflint.go		patch \| blob \| history
src/pkg/ebnf/ebnf.go		patch \| blob \| history
src/pkg/ebnf/ebnf_test.go		patch \| blob \| history
src/pkg/ebnf/parser.go		patch \| blob \| history