src = extractEBNF(src)
}
- grammar, err := ebnf.Parse(fset, filename, src)
+ grammar, err := ebnf.Parse(filename, bytes.NewBuffer(src))
if err != nil {
report(err)
}
- if err = ebnf.Verify(fset, grammar, *start); err != nil {
+ if err = ebnf.Verify(grammar, *start); err != nil {
report(err)
}
}
package ebnf
import (
- "go/scanner"
- "go/token"
+ "fmt"
"os"
+ "scanner"
"unicode"
"utf8"
)
+// ----------------------------------------------------------------------------
+// Error handling
+
+type errorList []os.Error
+
+func (list errorList) Error() os.Error {
+ if len(list) == 0 {
+ return nil
+ }
+ return list
+}
+
+func (list errorList) String() string {
+ switch len(list) {
+ case 0:
+ return "no errors"
+ case 1:
+ return list[0].String()
+ }
+ return fmt.Sprintf("%s (and %d more errors)", list[0], len(list)-1)
+}
+
+func newError(pos scanner.Position, msg string) os.Error {
+ return os.NewError(fmt.Sprintf("%s: %s", pos, msg))
+}
+
// ----------------------------------------------------------------------------
// Internal representation
// An Expression node represents a production expression.
Expression interface {
// Pos is the position of the first character of the syntactic construct
- Pos() token.Pos
+ Pos() scanner.Position
}
// An Alternative node represents a non-empty list of alternative expressions.
// A Name node represents a production name.
Name struct {
- StringPos token.Pos
+ StringPos scanner.Position
String string
}
// A Token node represents a literal.
Token struct {
- StringPos token.Pos
+ StringPos scanner.Position
String string
}
// A Group node represents a grouped expression.
Group struct {
- Lparen token.Pos
+ Lparen scanner.Position
Body Expression // (body)
}
// An Option node represents an optional expression.
Option struct {
- Lbrack token.Pos
+ Lbrack scanner.Position
Body Expression // [body]
}
// A Repetition node represents a repeated expression.
Repetition struct {
- Lbrace token.Pos
+ Lbrace scanner.Position
Body Expression // {body}
}
- // A Bad node stands for pieces of source code that lead to a parse error.
- Bad struct {
- TokPos token.Pos
- Error string // parser error message
- }
-
// A Production node represents an EBNF production.
Production struct {
Name *Name
Expr Expression
}
+ // A Bad node stands for pieces of source code that lead to a parse error.
+ Bad struct {
+ TokPos scanner.Position
+ Error string // parser error message
+ }
+
// A Grammar is a set of EBNF productions. The map
// is indexed by production name.
//
Grammar map[string]*Production
)
-func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative
-func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences
-func (x *Name) Pos() token.Pos { return x.StringPos }
-func (x *Token) Pos() token.Pos { return x.StringPos }
-func (x *Range) Pos() token.Pos { return x.Begin.Pos() }
-func (x *Group) Pos() token.Pos { return x.Lparen }
-func (x *Option) Pos() token.Pos { return x.Lbrack }
-func (x *Repetition) Pos() token.Pos { return x.Lbrace }
-func (x *Bad) Pos() token.Pos { return x.TokPos }
-func (x *Production) Pos() token.Pos { return x.Name.Pos() }
+func (x Alternative) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Alternative
+func (x Sequence) Pos() scanner.Position { return x[0].Pos() } // the parser always generates non-empty Sequences
+func (x *Name) Pos() scanner.Position { return x.StringPos }
+func (x *Token) Pos() scanner.Position { return x.StringPos }
+func (x *Range) Pos() scanner.Position { return x.Begin.Pos() }
+func (x *Group) Pos() scanner.Position { return x.Lparen }
+func (x *Option) Pos() scanner.Position { return x.Lbrack }
+func (x *Repetition) Pos() scanner.Position { return x.Lbrace }
+func (x *Production) Pos() scanner.Position { return x.Name.Pos() }
+func (x *Bad) Pos() scanner.Position { return x.TokPos }
// ----------------------------------------------------------------------------
// Grammar verification
}
type verifier struct {
- fset *token.FileSet
- scanner.ErrorVector
+ errors errorList
worklist []*Production
reached Grammar // set of productions reached from (and including) the root production
grammar Grammar
}
-func (v *verifier) error(pos token.Pos, msg string) {
- v.Error(v.fset.Position(pos), msg)
+func (v *verifier) error(pos scanner.Position, msg string) {
+ v.errors = append(v.errors, newError(pos, msg))
}
func (v *verifier) push(prod *Production) {
v.verifyExpr(x.Body, lexical)
case *Repetition:
v.verifyExpr(x.Body, lexical)
+ case *Bad:
+ v.error(x.Pos(), x.Error)
default:
- panic("unreachable")
+ panic(fmt.Sprintf("internal error: unexpected type %T", expr))
}
}
-func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) {
+func (v *verifier) verify(grammar Grammar, start string) {
// find root production
root, found := grammar[start]
if !found {
- // token.NoPos doesn't require a file set;
- // ok to set v.fset only afterwards
- v.error(token.NoPos, "no start production "+start)
+ var noPos scanner.Position
+ v.error(noPos, "no start production "+start)
return
}
// initialize verifier
- v.fset = fset
- v.ErrorVector.Reset()
v.worklist = v.worklist[0:0]
v.reached = make(Grammar)
v.grammar = grammar
//
// Position information is interpreted relative to the file set fset.
//
-func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error {
+func Verify(grammar Grammar, start string) os.Error {
var v verifier
- v.verify(fset, grammar, start)
- return v.GetError(scanner.Sorted)
+ v.verify(grammar, start)
+ return v.errors.Error()
}
package ebnf
import (
- "go/token"
- "io/ioutil"
+ "bytes"
"testing"
)
-var fset = token.NewFileSet()
-
var goodGrammars = []string{
`Program = .`,
`Program = {} .`,
}
-func checkGood(t *testing.T, filename string, src []byte) {
- grammar, err := Parse(fset, filename, src)
+func checkGood(t *testing.T, src string) {
+ grammar, err := Parse("", bytes.NewBuffer([]byte(src)))
if err != nil {
t.Errorf("Parse(%s) failed: %v", src, err)
+ return
}
- if err = Verify(fset, grammar, "Program"); err != nil {
+ if err = Verify(grammar, "Program"); err != nil {
t.Errorf("Verify(%s) failed: %v", src, err)
}
}
-func checkBad(t *testing.T, filename string, src []byte) {
- _, err := Parse(fset, filename, src)
+func checkBad(t *testing.T, src string) {
+ _, err := Parse("", bytes.NewBuffer([]byte(src)))
if err == nil {
t.Errorf("Parse(%s) should have failed", src)
}
func TestGrammars(t *testing.T) {
for _, src := range goodGrammars {
- checkGood(t, "", []byte(src))
+ checkGood(t, src)
}
for _, src := range badGrammars {
- checkBad(t, "", []byte(src))
- }
-}
-
-var files = []string{
-// TODO(gri) add some test files
-}
-
-func TestFiles(t *testing.T) {
- for _, filename := range files {
- src, err := ioutil.ReadFile(filename)
- if err != nil {
- t.Fatal(err)
- }
- checkGood(t, filename, src)
+ checkBad(t, src)
}
}
package ebnf
import (
- "go/scanner"
- "go/token"
+ "io"
"os"
+ "scanner"
"strconv"
)
type parser struct {
- fset *token.FileSet
- scanner.ErrorVector
+ errors errorList
scanner scanner.Scanner
- pos token.Pos // token position
- tok token.Token // one token look-ahead
- lit string // token literal
+ pos scanner.Position // token position
+ tok int // one token look-ahead
+ lit string // token literal
}
func (p *parser) next() {
- p.pos, p.tok, p.lit = p.scanner.Scan()
- if p.tok.IsKeyword() {
- // TODO Should keyword mapping always happen outside scanner?
- // Or should there be a flag to scanner to enable keyword mapping?
- p.tok = token.IDENT
- }
+ p.tok = p.scanner.Scan()
+ p.pos = p.scanner.Position
+ p.lit = p.scanner.TokenText()
}
-func (p *parser) error(pos token.Pos, msg string) {
- p.Error(p.fset.Position(pos), msg)
+func (p *parser) error(pos scanner.Position, msg string) {
+ p.errors = append(p.errors, newError(pos, msg))
}
-func (p *parser) errorExpected(pos token.Pos, msg string) {
- msg = "expected " + msg
- if pos == p.pos {
+func (p *parser) errorExpected(pos scanner.Position, msg string) {
+ msg = `expected "` + msg + `"`
+ if pos.Offset == p.pos.Offset {
// the error happened at the current position;
// make the error message more specific
- msg += ", found '" + p.tok.String() + "'"
- if p.tok.IsLiteral() {
+ msg += ", found " + scanner.TokenString(p.tok)
+ if p.tok < 0 {
msg += " " + p.lit
}
}
p.error(pos, msg)
}
-func (p *parser) expect(tok token.Token) token.Pos {
+func (p *parser) expect(tok int) scanner.Position {
pos := p.pos
if p.tok != tok {
- p.errorExpected(pos, "'"+tok.String()+"'")
+ p.errorExpected(pos, scanner.TokenString(tok))
}
p.next() // make progress in any case
return pos
func (p *parser) parseIdentifier() *Name {
pos := p.pos
name := p.lit
- p.expect(token.IDENT)
+ p.expect(scanner.Ident)
return &Name{pos, name}
}
func (p *parser) parseToken() *Token {
pos := p.pos
value := ""
- if p.tok == token.STRING {
+ if p.tok == scanner.String {
value, _ = strconv.Unquote(p.lit)
// Unquote may fail with an error, but only if the scanner found
// an illegal string in the first place. In this case the error
// has already been reported.
p.next()
} else {
- p.expect(token.STRING)
+ p.expect(scanner.String)
}
return &Token{pos, value}
}
pos := p.pos
switch p.tok {
- case token.IDENT:
+ case scanner.Ident:
x = p.parseIdentifier()
- case token.STRING:
+ case scanner.String:
tok := p.parseToken()
x = tok
- const ellipsis = "…" // U+2026, the horizontal ellipsis character
- if p.tok == token.ILLEGAL && p.lit == ellipsis {
+ const ellipsis = '…' // U+2026, the horizontal ellipsis character
+ if p.tok == ellipsis {
p.next()
x = &Range{tok, p.parseToken()}
}
- case token.LPAREN:
+ case '(':
p.next()
x = &Group{pos, p.parseExpression()}
- p.expect(token.RPAREN)
+ p.expect(')')
- case token.LBRACK:
+ case '[':
p.next()
x = &Option{pos, p.parseExpression()}
- p.expect(token.RBRACK)
+ p.expect(']')
- case token.LBRACE:
+ case '{':
p.next()
x = &Repetition{pos, p.parseExpression()}
- p.expect(token.RBRACE)
+ p.expect('}')
}
return x
for {
list = append(list, p.parseSequence())
- if p.tok != token.OR {
+ if p.tok != '|' {
break
}
p.next()
func (p *parser) parseProduction() *Production {
name := p.parseIdentifier()
- p.expect(token.ASSIGN)
+ p.expect('=')
var expr Expression
- if p.tok != token.PERIOD {
+ if p.tok != '.' {
expr = p.parseExpression()
}
- p.expect(token.PERIOD)
+ p.expect('.')
return &Production{name, expr}
}
-func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
- // initialize parser
- p.fset = fset
- p.ErrorVector.Reset()
- p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
+func (p *parser) parse(filename string, src io.Reader) Grammar {
+ p.scanner.Init(src)
+ p.scanner.Filename = filename
p.next() // initializes pos, tok, lit
grammar := make(Grammar)
- for p.tok != token.EOF {
+ for p.tok != scanner.EOF {
prod := p.parseProduction()
name := prod.Name.String
if _, found := grammar[name]; !found {
// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
-// more than once. Position information is recorded relative
-// to the file set fset.
+// more than once; the filename is used only for error
+// positions.
//
-func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
+func Parse(filename string, src io.Reader) (Grammar, os.Error) {
var p parser
- grammar := p.parse(fset, filename, src)
- return grammar, p.GetError(scanner.Sorted)
+ grammar := p.parse(filename, src)
+ return grammar, p.errors.Error()
}