crypto/md5.install: hash.install os.install
crypto/sha1.install: hash.install os.install
datafmt.install: bytes.install container/vector.install fmt.install go/scanner.install go/token.install io.install os.install reflect.install runtime.install strconv.install strings.install
+ebnf.install: container/vector.install fmt.install go/scanner.install go/token.install os.install strconv.install strings.install unicode.install utf8.install
exec.install: os.install strings.install
exvar.install: bytes.install fmt.install http.install io.install log.install strconv.install sync.install
flag.install: fmt.install os.install strconv.install
crypto/md5\
crypto/sha1\
datafmt\
+ ebnf\
exec\
exvar\
flag\
crypto/md5\
crypto/sha1\
datafmt\
+ ebnf\
exec\
exvar\
flag\
--- /dev/null
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+
+# DO NOT EDIT. Automatically generated by gobuild.
+# gobuild -m ebnf.go parser.go >Makefile
+
+D=
+
+include $(GOROOT)/src/Make.$(GOARCH)
+AR=gopack
+
+default: packages
+
+clean:
+ rm -rf *.[$(OS)] *.a [$(OS)].out _obj
+
+test: packages
+ gotest
+
+coverage: packages
+ gotest
+ 6cov -g $$(pwd) | grep -v '_test\.go:'
+
+%.$O: %.go
+ $(GC) -I_obj $*.go
+
+%.$O: %.c
+ $(CC) $*.c
+
+%.$O: %.s
+ $(AS) $*.s
+
+O1=\
+ ebnf.$O\
+
+O2=\
+ parser.$O\
+
+
+phases: a1 a2
+_obj$D/ebnf.a: phases
+
+a1: $(O1)
+ $(AR) grc _obj$D/ebnf.a ebnf.$O
+ rm -f $(O1)
+
+a2: $(O2)
+ $(AR) grc _obj$D/ebnf.a parser.$O
+ rm -f $(O2)
+
+
+newpkg: clean
+ mkdir -p _obj$D
+ $(AR) grc _obj$D/ebnf.a
+
+$(O1): newpkg
+$(O2): a1
+$(O3): a2
+
+nuke: clean
+ rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/ebnf.a
+
+packages: _obj$D/ebnf.a
+
+install: packages
+ test -d $(GOROOT)/pkg && mkdir -p $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D
+ cp _obj$D/ebnf.a $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/ebnf.a
--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// A library for EBNF grammars. The input is text ([]byte) satisfying
+// the following grammar (represented itself in EBNF):
+//
+// Production = name "=" Expression "." .
+// Expression = Alternative { "|" Alternative } .
+// Alternative = Term { Term } .
+// Term = name | token [ "..." token ] | Group | Option | Repetition .
+// Group = "(" Expression ")" .
+// Option = "[" Expression "]" .
+// Repetition = "{" Expression "}" .
+//
+// A name is a Go identifier, a token is a Go string, and comments
+// and white space follow the same rules as for the Go language.
+// Production names starting with an uppercase Unicode letter denote
+// non-terminal productions (i.e., productions which allow white-space
+// and comments between tokens); all other production names denote
+// lexical productions.
+//
+package ebnf
+
+import (
+ "container/vector";
+ "fmt";
+ "go/scanner";
+ "go/token";
+ "os";
+ "strconv";
+ "strings";
+ "unicode";
+ "utf8";
+)
+
+
+// ----------------------------------------------------------------------------
+// Internal representation
+
+type (
+ // An Expression node represents a production expression.
+ Expression interface {
+ // Pos is the position of the first character of the syntactic construct
+ Pos() token.Position;
+ };
+
+ // An Alternative node represents a non-empty list of alternative expressions.
+ Alternative []Expression; // x | y | z
+
+ // A Sequence node represents a non-empty list of sequential expressions.
+ Sequence []Expression; // x y z
+
+ // A Name node represents a production name.
+ Name struct {
+ token.Position;
+ String string;
+ };
+
+ // A Token node represents a literal.
+ Token struct {
+ token.Position;
+ String string;
+ };
+
+ // A List node represents a range of characters.
+ Range struct {
+ Begin, End *Token; // begin ... end
+ };
+
+ // A Group node represents a grouped expression.
+ Group struct {
+ token.Position;
+ Body Expression; // (body)
+ };
+
+ // An Option node represents an optional expression.
+ Option struct {
+ token.Position;
+ Body Expression; // [body]
+ };
+
+ // A Repetition node represents a repeated expression.
+ Repetition struct {
+ token.Position;
+ Body Expression; // {body}
+ };
+
+ // A Production node represents an EBNF production.
+ Production struct {
+ Name *Name;
+ Expr Expression;
+ };
+
+ // A Grammar is a set of EBNF productions. The map
+ // is indexed by production name.
+ //
+ Grammar map [string] *Production;
+)
+
+
+func (x Alternative) Pos() token.Position {
+ return x[0].Pos(); // the parser always generates non-empty Alternative
+}
+
+
+func (x Sequence) Pos() token.Position {
+ return x[0].Pos(); // the parser always generates non-empty Sequences
+}
+
+
+func (x Range) Pos() token.Position {
+ return x.Begin.Pos();
+}
+
+
+func (p *Production) Pos() token.Position {
+ return p.Name.Pos();
+}
+
+
+// ----------------------------------------------------------------------------
+// Error handling
+
+// TODO(gri) This is the same code as in datafmt and go/parser.
+// Should factor this out as part of some parsing framework
+// that could also deal with reading various input sources.
+
+// Error describes an individual error. The position Pos, if valid,
+// indicates the format source position the error relates to. The
+// error is specified with the Msg string.
+//
+type Error struct {
+ Pos token.Position;
+ Msg string;
+}
+
+
+// String returns the error message. If the error contains (line, column)
+// position information, it starts with "line:column: ", otherwise it
+// starts with a blank " ".
+//
+func (e *Error) String() string {
+ pos := " ";
+ if e.Pos.IsValid() {
+ pos = fmt.Sprintf("%d:%d: ", e.Pos.Line, e.Pos.Column);
+ }
+ return pos + e.Msg;
+}
+
+
+// An ErrorList is a list of errors encountered during parsing.
+type ErrorList []*Error
+
+
+// ErrorList implements SortInterface and the os.Error interface.
+
+func (p ErrorList) Len() int { return len(p); }
+func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
+func (p ErrorList) Less(i, j int) bool { return p[i].Pos.Offset < p[j].Pos.Offset; }
+
+
+func (p ErrorList) String() string {
+ switch len(p) {
+ case 0:
+ return "unspecified error";
+ case 1:
+ return p[0].String();
+ }
+ return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
+}
+
+
+// ----------------------------------------------------------------------------
+// Grammar verification
+
+func isLexical(name string) bool {
+ ch, len := utf8.DecodeRuneInString(name);
+ return !unicode.IsUpper(ch);
+}
+
+
+type verifier struct {
+ errors vector.Vector;
+ worklist vector.Vector;
+ reached Grammar; // set of productions reached from (and including) the root production
+ grammar Grammar;
+}
+
+
+func (v *verifier) error(pos token.Position, msg string) {
+ v.errors.Push(&Error{pos, msg});
+}
+
+
+func makeErrorList(v *vector.Vector) os.Error {
+ if v.Len() > 0 {
+ errors := make(ErrorList, v.Len());
+ for i := 0; i < v.Len(); i++ {
+ errors[i] = v.At(i).(*Error);
+ }
+ return errors;
+ }
+ return nil;
+}
+
+
+func (v *verifier) push(prod *Production) {
+ name := prod.Name.String;
+ if _, found := v.reached[name]; !found {
+ v.worklist.Push(prod);
+ v.reached[name] = prod;
+ }
+}
+
+
+func (v *verifier) verifyChar(x *Token) int {
+ s := x.String;
+ if utf8.RuneCountInString(s) != 1 {
+ v.error(x.Pos(), "single char expected, found " + s);
+ return 0;
+ }
+ ch, _ := utf8.DecodeRuneInString(s);
+ return ch;
+}
+
+
+func (v *verifier) verifyExpr(expr Expression, lexical bool) {
+ switch x := expr.(type) {
+ case nil:
+ // empty expression
+ case Alternative:
+ for _, e := range x {
+ v.verifyExpr(e, lexical);
+ }
+ case Sequence:
+ for _, e := range x {
+ v.verifyExpr(e, lexical);
+ }
+ case *Name:
+ // a production with this name must exist;
+ // add it to the worklist if not yet processed
+ if prod, found := v.grammar[x.String]; found {
+ v.push(prod);
+ } else {
+ v.error(x.Pos(), "missing production " + x.String);
+ }
+ // within a lexical production references
+ // to non-lexical productions are invalid
+ if lexical && !isLexical(x.String) {
+ v.error(x.Pos(), "reference to non-lexical production " + x.String);
+ }
+ case *Token:
+ // nothing to do for now
+ case *Range:
+ i := v.verifyChar(x.Begin);
+ j := v.verifyChar(x.End);
+ if i >= j {
+ v.error(x.Pos(), "decreasing character range");
+ }
+ case *Group:
+ v.verifyExpr(x.Body, lexical);
+ case *Option:
+ v.verifyExpr(x.Body, lexical);
+ case *Repetition:
+ v.verifyExpr(x.Body, lexical);
+ default:
+ panic("unreachable");
+ }
+}
+
+
+func (v *verifier) verify(grammar Grammar, start string) {
+ // find root production
+ root, found := grammar[start];
+ if !found {
+ var noPos token.Position;
+ v.error(noPos, "no start production " + start);
+ return;
+ }
+
+ // initialize verifier
+ v.errors.Init(0);
+ v.worklist.Init(0);
+ v.reached = make(Grammar);
+ v.grammar = grammar;
+
+ // work through the worklist
+ v.push(root);
+ for v.worklist.Len() > 0 {
+ prod := v.worklist.Pop().(*Production);
+ v.verifyExpr(prod.Expr, isLexical(prod.Name.String));
+ }
+
+ // check if all productions were reached
+ if len(v.reached) < len(v.grammar) {
+ for name, prod := range v.grammar {
+ if _, found := v.reached[name]; !found {
+ v.error(prod.Pos(), name + " is unreachable");
+ }
+ }
+ }
+}
+
+
+// Verify checks that:
+// - all productions used are defined
+// - all productions defined are used when beginning at start
+// - lexical productions refer only to other lexical productions
+//
+func Verify(grammar Grammar, start string) os.Error {
+ var v verifier;
+ v.verify(grammar, start);
+ return makeErrorList(&v.errors);
+}
--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ebnf
+
+import (
+ "ebnf";
+ "io";
+ "strings";
+ "testing";
+)
+
+
+var grammars = []string {
+ `Program = .
+ `,
+
+ `Program = foo .
+ foo = "foo" .
+ `,
+
+ `Program = "a" | "b" "c" .
+ `,
+
+ `Program = "a" ... "z" .
+ `,
+
+ `Program = Song .
+ Song = { Note } .
+ Note = Do | (Re | Mi | Fa | So | La) | Ti .
+ Do = "c" .
+ Re = "d" .
+ Mi = "e" .
+ Fa = "f" .
+ So = "g" .
+ La = "a" .
+ Ti = ti .
+ ti = "b" .
+ `,
+}
+
+
+func check(t *testing.T, src []byte) {
+ grammar, err := Parse(src);
+ if err != nil {
+ t.Errorf("Parse(%s) failed: %v", src, err);
+ }
+ if err = Verify(grammar, "Program"); err != nil {
+ t.Errorf("Verify(%s) failed: %v", src, err);
+ }
+}
+
+
+func TestGrammars(t *testing.T) {
+ for _, src := range grammars {
+ check(t, strings.Bytes(src));
+ }
+}
+
+
+var files = []string {
+ // TODO(gri) add some test files
+}
+
+
+func TestFiles(t *testing.T) {
+ for _, filename := range files {
+ src, err := io.ReadFile(filename);
+ if err != nil {
+ t.Fatal(err);
+ }
+ check(t, src);
+ }
+}
--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ebnf
+
+import (
+ "container/vector";
+ "ebnf";
+ "fmt";
+ "go/scanner";
+ "go/token";
+ "os";
+ "strconv";
+ "strings";
+ "unicode";
+ "utf8";
+)
+
+
+type parser struct {
+ errors vector.Vector;
+ scanner scanner.Scanner;
+ pos token.Position; // token position
+ tok token.Token; // one token look-ahead
+ lit []byte; // token literal
+}
+
+
+func (p *parser) next() {
+ p.pos, p.tok, p.lit = p.scanner.Scan();
+ if p.tok.IsKeyword() {
+ // TODO Should keyword mapping always happen outside scanner?
+ // Or should there be a flag to scanner to enable keyword mapping?
+ p.tok = token.IDENT;
+ }
+}
+
+
+func (p *parser) init(src []byte) {
+ p.errors.Init(0);
+ p.scanner.Init(src, p, 0);
+ p.next(); // initializes pos, tok, lit
+}
+
+
+// The parser implements scanner.Error.
+func (p *parser) Error(pos token.Position, msg string) {
+ // Do not collect errors that are on the same line as the previous
+ // error to reduce the number of spurious errors due to incorrect
+ // parser synchronization.
+ if p.errors.Len() == 0 || p.errors.Last().(*Error).Pos.Line != pos.Line {
+ p.errors.Push(&Error{pos, msg});
+ }
+}
+
+
+func (p *parser) errorExpected(pos token.Position, msg string) {
+ msg = "expected " + msg;
+ if pos.Offset == p.pos.Offset {
+ // the error happened at the current position;
+ // make the error message more specific
+ msg += ", found '" + p.tok.String() + "'";
+ if p.tok.IsLiteral() {
+ msg += " " + string(p.lit);
+ }
+ }
+ p.Error(pos, msg);
+}
+
+
+func (p *parser) expect(tok token.Token) token.Position {
+ pos := p.pos;
+ if p.tok != tok {
+ p.errorExpected(pos, "'" + tok.String() + "'");
+ }
+ p.next(); // make progress in any case
+ return pos;
+}
+
+
+func (p *parser) parseIdentifier() *Name {
+ pos := p.pos;
+ name := string(p.lit);
+ p.expect(token.IDENT);
+ return &Name{pos, name};
+}
+
+
+func (p *parser) parseToken() *Token {
+ pos := p.pos;
+ value := "";
+ if p.tok == token.STRING {
+ var err os.Error;
+ value, err = strconv.Unquote(string(p.lit));
+ // Unquote may fail with an error, but only if the scanner found
+ // an illegal string in the first place. In this case the error
+ // has already been reported.
+ p.next();
+ } else {
+ p.expect(token.STRING);
+ }
+ return &Token{pos, value};
+}
+
+
+func (p *parser) parseExpression() Expression
+
+func (p *parser) parseTerm() (x Expression) {
+ pos := p.pos;
+
+ switch p.tok {
+ case token.IDENT:
+ x = p.parseIdentifier();
+
+ case token.STRING:
+ tok := p.parseToken();
+ x = tok;
+ if p.tok == token.ELLIPSIS {
+ p.next();
+ x = &Range{tok, p.parseToken()};
+ }
+
+ case token.LPAREN:
+ p.next();
+ x = &Group{pos, p.parseExpression()};
+ p.expect(token.RPAREN);
+
+ case token.LBRACK:
+ p.next();
+ x = &Option{pos, p.parseExpression()};
+ p.expect(token.RBRACK);
+
+ case token.LBRACE:
+ p.next();
+ x = &Repetition{pos, p.parseExpression()};
+ p.expect(token.RBRACE);
+ }
+
+ return x;
+}
+
+
+func (p *parser) parseSequence() Expression {
+ var list vector.Vector;
+ list.Init(0);
+
+ for x := p.parseTerm(); x != nil; x = p.parseTerm() {
+ list.Push(x);
+ }
+
+ // no need for a sequence if list.Len() < 2
+ switch list.Len() {
+ case 0:
+ return nil;
+ case 1:
+ return list.At(0).(Expression);
+ }
+
+ // convert list into a sequence
+ seq := make(Sequence, list.Len());
+ for i := 0; i < list.Len(); i++ {
+ seq[i] = list.At(i).(Expression);
+ }
+ return seq;
+}
+
+
+func (p *parser) parseExpression() Expression {
+ var list vector.Vector;
+ list.Init(0);
+
+ for {
+ x := p.parseSequence();
+ if x != nil {
+ list.Push(x);
+ }
+ if p.tok != token.OR {
+ break;
+ }
+ p.next();
+ }
+
+ // no need for an Alternative node if list.Len() < 2
+ switch list.Len() {
+ case 0:
+ return nil;
+ case 1:
+ return list.At(0).(Expression);
+ }
+
+ // convert list into an Alternative node
+ alt := make(Alternative, list.Len());
+ for i := 0; i < list.Len(); i++ {
+ alt[i] = list.At(i).(Expression);
+ }
+ return alt;
+}
+
+
+func (p *parser) parseProduction() *Production {
+ name := p.parseIdentifier();
+ p.expect(token.ASSIGN);
+ expr := p.parseExpression();
+ p.expect(token.PERIOD);
+ return &Production{name, expr};
+}
+
+
+func (p *parser) parse(src []byte) Grammar {
+ // initialize parser
+ p.errors.Init(0);
+ p.scanner.Init(src, p, 0);
+ p.next(); // initializes pos, tok, lit
+
+ grammar := make(Grammar);
+ for p.tok != token.EOF {
+ prod := p.parseProduction();
+ name := prod.Name.String;
+ if prev, found := grammar[name]; !found {
+ grammar[name] = prod;
+ } else {
+ p.Error(prod.Pos(), name + " declared already");
+ }
+ }
+
+ return grammar;
+}
+
+
+// Parse parses a set of EBNF productions from source src.
+// It returns a set of productions. Errors are reported
+// for incorrect syntax and if a production is declared
+// more than once.
+//
+func Parse(src []byte) (Grammar, os.Error) {
+ var p parser;
+ grammar := p.parse(src);
+ return grammar, makeErrorList(&p.errors);
+}