From: Robert Griesemer Date: Sat, 28 Mar 2009 02:27:09 +0000 (-0700) Subject: Significant parser cleanup: X-Git-Tag: weekly.2009-11-06~1948 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=75a5d6cd2dab313f21face3decfaa4f5be23d088;p=gostls13.git Significant parser cleanup: - commented public interface - much better and very precise error messages - much better tracing output - many more checks (still permits more than just syntactically legal programs, but much more is checked that can be checked w/o semantic information) - updated with respect to updated AST - general cleanup throughout Parser almost ready for move into lib/go. R=r OCL=26853 CL=26855 --- diff --git a/usr/gri/pretty/compilation.go b/usr/gri/pretty/compilation.go index 17906c50e1..c068ce8409 100644 --- a/usr/gri/pretty/compilation.go +++ b/usr/gri/pretty/compilation.go @@ -97,11 +97,11 @@ func Compile(src_file string, flags *Flags) (*ast.Package, ErrorList) { var scanner scanner.Scanner; scanner.Init(src, &err, true); - pflags := uint(0); + mode := uint(0); if flags.Verbose { - pflags |= parser.Trace; + mode |= parser.Trace; } - prog, nerrs := parser.Parse(&scanner, &err, parser.ParseEntirePackage, pflags); + prog, nerrs := parser.Parse(&scanner, &err, mode); if err.errors.Len() == 0 { TypeChecker.CheckProgram(&err, prog); diff --git a/usr/gri/pretty/parser.go b/usr/gri/pretty/parser.go index 92ff4622a6..97fea2c479 100644 --- a/usr/gri/pretty/parser.go +++ b/usr/gri/pretty/parser.go @@ -21,8 +21,7 @@ import ( // The parser calls Scan() repeatedly until token.EOF is returned. // Scan must return the current token position pos, the token value // tok, and the corresponding token literal string lit; lit can be -// undefined/nil unless the token is a literal (i.e., tok.IsLiteral() -// is true). +// undefined/nil unless the token is a literal (tok.IsLiteral() == true). // type Scanner interface { Scan() (pos token.Position, tok token.Token, lit []byte); @@ -39,14 +38,6 @@ type ErrorHandler interface { } -// The following flags control optional parser functionality. A set of -// flags (or 0) must be provided as a parameter to the Parse function. -// -const ( - Trace = 1 << iota; // print a trace of parsed productions -) - - type interval struct { beg, end int; } @@ -59,8 +50,9 @@ type parser struct { errorCount int; // Tracing/debugging - trace bool; - indent uint; + mode uint; // parsing mode + trace bool; // == (mode & Trace != 0) + indent uint; // indentation used for tracing output // Comments comments vector.Vector; // list of collected, unassociated comments @@ -77,37 +69,31 @@ type parser struct { }; -// When we don't have a position use nopos. -// TODO make sure we always have a position. -var nopos token.Position; - - -// ---------------------------------------------------------------------------- -// Helper functions - -func unreachable() { - panic("unreachable"); -} +// noPos is used when there is no corresponding source position for a token +var noPos token.Position; // ---------------------------------------------------------------------------- // Parsing support -func (p *parser) printIndent() { - i := p.indent; - // reduce printing time by a factor of 2 or more - for ; i > 10; i -= 10 { - fmt.Printf(". . . . . . . . . . "); - } - for ; i > 0; i-- { - fmt.Printf(". "); +func (p *parser) printTrace(a ...) { + const dots = + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . " + ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "; + const n = uint(len(dots)); + + fmt.Printf("%5d:%3d: ", p.pos.Line, p.pos.Column); + i := 2*p.indent; + for ; i > n; i -= n { + fmt.Print(dots[0 : i%n]); } + fmt.Print(dots[0 : i]); + fmt.Println(a); } func trace(p *parser, msg string) *parser { - p.printIndent(); - fmt.Printf("%s (\n", msg); + p.printTrace(msg, "("); p.indent++; return p; } @@ -115,35 +101,35 @@ func trace(p *parser, msg string) *parser { func un/*trace*/(p *parser) { p.indent--; - p.printIndent(); - fmt.Printf(")\n"); + p.printTrace(")"); } func (p *parser) next0() { - p.pos, p.tok, p.lit = p.scanner.Scan(); - p.opt_semi = false; - - if p.trace { - p.printIndent(); - switch p.tok { - case token.IDENT, token.INT, token.FLOAT, token.CHAR, token.STRING: - fmt.Printf("%d:%d: %s = %s\n", p.pos.Line, p.pos.Column, p.tok.String(), p.lit); - case token.LPAREN: - // don't print '(' - screws up selection in terminal window - fmt.Printf("%d:%d: LPAREN\n", p.pos.Line, p.pos.Column); - case token.RPAREN: - // don't print ')' - screws up selection in terminal window - fmt.Printf("%d:%d: RPAREN\n", p.pos.Line, p.pos.Column); + // Because of one-token look-ahead, print the previous token + // when tracing as it provides a more readable output. The + // very first token (p.pos.Line == 0) is not initialized (it + // is token.ILLEGAL), so don't print it . + if p.trace && p.pos.Line > 0 { + s := p.tok.String(); + switch { + case p.tok.IsLiteral(): + p.printTrace(s, string(p.lit)); + case p.tok.IsOperator(), p.tok.IsKeyword(): + p.printTrace("\"" + s + "\""); default: - fmt.Printf("%d:%d: %s\n", p.pos.Line, p.pos.Column, p.tok.String()); + p.printTrace(s); } } + + p.pos, p.tok, p.lit = p.scanner.Scan(); + p.opt_semi = false; } // Collect a comment in the parser's comment list and return the line // on which the comment ends. +// func (p *parser) collectComment() int { // For /*-style comments, the comment may end on a different line. // Scan the comment for '\n' chars and adjust the end line accordingly. @@ -176,6 +162,30 @@ func (p *parser) getComments() interval { } +func (p *parser) getDoc() ast.Comments { + doc := p.last_doc; + n := doc.end - doc.beg; + + if n <= 0 || p.comments.At(doc.end - 1).(*ast.Comment).EndLine + 1 < p.pos.Line { + // no comments or empty line between last comment and current token; + // do not use as documentation + return nil; + } + + // found immediately adjacent comment interval; + // use as documentation + c := make(ast.Comments, n); + for i := 0; i < n; i++ { + c[i] = p.comments.At(doc.beg + i).(*ast.Comment); + } + + // remove comments from the general list + p.comments.Cut(doc.beg, doc.end); + + return c; +} + + func (p *parser) next() { p.next0(); p.last_doc = interval{0, 0}; @@ -193,41 +203,27 @@ func (p *parser) error(pos token.Position, msg string) { } -func (p *parser) expect(tok token.Token) token.Position { - if p.tok != tok { - msg := "expected '" + tok.String() + "', found '" + p.tok.String() + "'"; +func (p *parser) error_expected(pos token.Position, msg string) { + msg = "expected " + msg; + if pos.Offset == p.pos.Offset { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'"; if p.tok.IsLiteral() { msg += " " + string(p.lit); } - p.error(p.pos, msg); } - pos := p.pos; - p.next(); // make progress in any case - return pos; + p.error(pos, msg); } -func (p *parser) getDoc() ast.Comments { - doc := p.last_doc; - n := doc.end - doc.beg; - - if n <= 0 || p.comments.At(doc.end - 1).(*ast.Comment).EndLine + 1 < p.pos.Line { - // no comments or empty line between last comment and current token; - // do not use as documentation - return nil; - } - - // found immediately adjacent comment interval; - // use as documentation - c := make(ast.Comments, n); - for i := 0; i < n; i++ { - c[i] = p.comments.At(doc.beg + i).(*ast.Comment); +func (p *parser) expect(tok token.Token) token.Position { + pos := p.pos; + if p.tok != tok { + p.error_expected(pos, "'" + tok.String() + "'"); } - - // remove comments from the general list - p.comments.Cut(doc.beg, doc.end); - - return c; + p.next(); // make progress in any case + return pos; } @@ -235,23 +231,19 @@ func (p *parser) getDoc() ast.Comments { // Common productions func (p *parser) tryType() ast.Expr; -func (p *parser) parseExpression(prec int) ast.Expr; +func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit +func (p *parser) parseExpression() ast.Expr; func (p *parser) parseStatement() ast.Stmt; func (p *parser) parseDeclaration() ast.Decl; func (p *parser) parseIdent() *ast.Ident { - if p.trace { - defer un(trace(p, "Ident")); - } - if p.tok == token.IDENT { x := &ast.Ident{p.pos, p.lit}; p.next(); return x; } p.expect(token.IDENT); // use expect() error handling - return &ast.Ident{p.pos, [0]byte{}}; } @@ -287,10 +279,10 @@ func (p *parser) parseExpressionList() []ast.Expr { } list := vector.New(0); - list.Push(p.parseExpression(1)); + list.Push(p.parseExpression()); for p.tok == token.COMMA { p.next(); - list.Push(p.parseExpression(1)); + list.Push(p.parseExpression()); } // convert list @@ -312,9 +304,10 @@ func (p *parser) parseType() ast.Expr { } typ := p.tryType(); + if typ == nil { - p.error(p.pos, "type expected"); - typ = &ast.BadExpr{p.pos}; + p.error_expected(p.pos, "type"); + return &ast.BadExpr{p.pos}; } return typ; @@ -327,7 +320,8 @@ func (p *parser) parseQualifiedIdent() ast.Expr { } var x ast.Expr = p.parseIdent(); - for p.tok == token.PERIOD { + if p.tok == token.PERIOD { + // first identifier is a package identifier p.next(); sel := p.parseIdent(); x = &ast.SelectorExpr{x, sel}; @@ -345,67 +339,164 @@ func (p *parser) parseTypeName() ast.Expr { } -func (p *parser) parseArrayType() *ast.ArrayType { +func (p *parser) parseArrayOrSliceType(ellipsis_ok bool) ast.Expr { if p.trace { - defer un(trace(p, "ArrayType")); + defer un(trace(p, "ArrayOrSliceType")); } lbrack := p.expect(token.LBRACK); var len ast.Expr; - if p.tok == token.ELLIPSIS { + if ellipsis_ok && p.tok == token.ELLIPSIS { len = &ast.Ellipsis{p.pos}; p.next(); } else if p.tok != token.RBRACK { - len = p.parseExpression(1); + len = p.parseExpression(); } p.expect(token.RBRACK); elt := p.parseType(); - return &ast.ArrayType{lbrack, len, elt}; + if len != nil { + return &ast.ArrayType{lbrack, len, elt}; + } + + return &ast.SliceType{lbrack, elt}; } -func (p *parser) parseChannelType() *ast.ChannelType { +func (p *parser) makeIdentList(list *vector.Vector) []*ast.Ident { + idents := make([]*ast.Ident, list.Len()); + for i := 0; i < list.Len(); i++ { + ident, is_ident := list.At(i).(*ast.Ident); + if !is_ident { + pos := list.At(i).(ast.Expr).Pos(); + p.error_expected(pos, "identifier"); + idents[i] = &ast.Ident{pos, []byte{}}; + } + idents[i] = ident; + } + return idents; +} + + +func (p *parser) parseFieldDecl() *ast.Field { if p.trace { - defer un(trace(p, "ChannelType")); + defer un(trace(p, "FieldDecl")); } - pos := p.pos; - dir := ast.SEND | ast.RECV; - if p.tok == token.CHAN { - p.next(); - if p.tok == token.ARROW { + doc := p.getDoc(); + + // a list of identifiers looks like a list of type names + list := vector.New(0); + for { + // TODO do not allow ()'s here + list.Push(p.parseType()); + if p.tok == token.COMMA { p.next(); - dir = ast.SEND; + } else { + break; } + } + + // if we had a list of identifiers, it must be followed by a type + typ := p.tryType(); + + // optional tag + var tag []*ast.StringLit; + if p.tok == token.STRING { + tag = p.parseStringList(nil); + } + + // analyze case + var idents []*ast.Ident; + if typ != nil { + // IdentifierList Type + idents = p.makeIdentList(list); } else { - p.expect(token.ARROW); - p.expect(token.CHAN); - dir = ast.RECV; + // Type (anonymous field) + if list.Len() == 1 { + // TODO check that this looks like a type + typ = list.At(0).(ast.Expr); + } else { + p.error_expected(p.pos, "anonymous field"); + typ = &ast.BadExpr{p.pos}; + } } - value := p.parseType(); - return &ast.ChannelType{pos, dir, value}; + return &ast.Field{doc, idents, typ, tag}; } -func (p *parser) tryParameterType() ast.Expr { - if p.tok == token.ELLIPSIS { - x := &ast.Ellipsis{p.pos}; +func (p *parser) parseStructType() *ast.StructType { + if p.trace { + defer un(trace(p, "StructType")); + } + + pos := p.expect(token.STRUCT); + var lbrace, rbrace token.Position; + var fields []*ast.Field; + if p.tok == token.LBRACE { + lbrace = p.pos; p.next(); - return x; + + list := vector.New(0); + for p.tok != token.RBRACE && p.tok != token.EOF { + list.Push(p.parseFieldDecl()); + if p.tok == token.SEMICOLON { + p.next(); + } else { + break; + } + } + if p.tok == token.SEMICOLON { + p.next(); + } + + rbrace = p.expect(token.RBRACE); + p.opt_semi = true; + + // convert vector + fields = make([]*ast.Field, list.Len()); + for i := list.Len() - 1; i >= 0; i-- { + fields[i] = list.At(i).(*ast.Field); + } + } + + return &ast.StructType{pos, lbrace, fields, rbrace}; +} + + +func (p *parser) parsePointerType() *ast.StarExpr { + if p.trace { + defer un(trace(p, "PointerType")); + } + + star := p.expect(token.MUL); + base := p.parseType(); + + return &ast.StarExpr{star, base}; +} + + +func (p *parser) tryParameterType(ellipsis_ok bool) ast.Expr { + if ellipsis_ok && p.tok == token.ELLIPSIS { + pos := p.pos; + p.next(); + if p.tok != token.RPAREN { + // "..." always must be at the very end of a parameter list + p.error(pos, "expected type, found '...'"); + } + return &ast.Ellipsis{pos}; } return p.tryType(); } -func (p *parser) parseParameterType() ast.Expr { - typ := p.tryParameterType(); +func (p *parser) parseParameterType(ellipsis_ok bool) ast.Expr { + typ := p.tryParameterType(ellipsis_ok); if typ == nil { - p.error(p.pos, "type expected"); + p.error_expected(p.pos, "type"); typ = &ast.BadExpr{p.pos}; } - return typ; } @@ -419,7 +510,7 @@ func (p *parser) parseParameterDecl(ellipsis_ok bool) (*vector.Vector, ast.Expr) list := vector.New(0); for { // TODO do not allow ()'s here - list.Push(p.parseParameterType()); + list.Push(p.parseParameterType(ellipsis_ok)); if p.tok == token.COMMA { p.next(); } else { @@ -428,7 +519,7 @@ func (p *parser) parseParameterDecl(ellipsis_ok bool) (*vector.Vector, ast.Expr) } // if we had a list of identifiers, it must be followed by a type - typ := p.tryParameterType(); + typ := p.tryParameterType(ellipsis_ok); return list, typ; } @@ -439,26 +530,22 @@ func (p *parser) parseParameterList(ellipsis_ok bool) []*ast.Field { defer un(trace(p, "ParameterList")); } - list, typ := p.parseParameterDecl(false); + list, typ := p.parseParameterDecl(ellipsis_ok); if typ != nil { // IdentifierList Type - // convert list of identifiers into []*Ident - idents := make([]*ast.Ident, list.Len()); - for i := 0; i < list.Len(); i++ { - idents[i] = list.At(i).(*ast.Ident); - } + idents := p.makeIdentList(list); list.Init(0); list.Push(&ast.Field{nil, idents, typ, nil}); for p.tok == token.COMMA { p.next(); idents := p.parseIdentList(nil); - typ := p.parseParameterType(); + typ := p.parseParameterType(ellipsis_ok); list.Push(&ast.Field{nil, idents, typ, nil}); } } else { - // Type { "," Type } + // Type { "," Type } (anonymous parameters) // convert list of types into list of *Param for i := 0; i < list.Len(); i++ { list.Set(i, &ast.Field{nil, nil, list.At(i).(ast.Expr), nil}); @@ -475,7 +562,6 @@ func (p *parser) parseParameterList(ellipsis_ok bool) []*ast.Field { } -// TODO make sure Go spec is updated func (p *parser) parseParameters(ellipsis_ok bool) []*ast.Field { if p.trace { defer un(trace(p, "Parameters")); @@ -512,18 +598,12 @@ func (p *parser) parseResult() []*ast.Field { } -// Function types -// -// (params) -// (params) type -// (params) (results) - func (p *parser) parseSignature() (params []*ast.Field, results []*ast.Field) { if p.trace { defer un(trace(p, "Signature")); } - params = p.parseParameters(true); // TODO find better solution + params = p.parseParameters(true); results = p.parseResult(); return params, results; @@ -552,10 +632,10 @@ func (p *parser) parseMethodSpec() *ast.Field { var typ ast.Expr; x := p.parseQualifiedIdent(); if tmp, is_ident := x.(*ast.Ident); is_ident && (p.tok == token.COMMA || p.tok == token.LPAREN) { - // method(s) + // methods idents = p.parseIdentList(x); params, results := p.parseSignature(); - typ = &ast.FunctionType{nopos, params, results}; + typ = &ast.FunctionType{noPos, params, results}; } else { // embedded interface typ = x; @@ -614,133 +694,46 @@ func (p *parser) parseMapType() *ast.MapType { } -func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit - -func (p *parser) parseFieldDecl() *ast.Field { - if p.trace { - defer un(trace(p, "FieldDecl")); - } - - doc := p.getDoc(); - - // a list of identifiers looks like a list of type names - list := vector.New(0); - for { - // TODO do not allow ()'s here - list.Push(p.parseType()); - if p.tok == token.COMMA { - p.next(); - } else { - break; - } - } - - // if we had a list of identifiers, it must be followed by a type - typ := p.tryType(); - - // optional tag - var tag []*ast.StringLit; - if p.tok == token.STRING { - tag = p.parseStringList(nil); - } - - // analyze case - var idents []*ast.Ident; - if typ != nil { - // non-empty identifier list followed by a type - idents = make([]*ast.Ident, list.Len()); - for i := 0; i < list.Len(); i++ { - if ident, is_ident := list.At(i).(*ast.Ident); is_ident { - idents[i] = ident; - } else { - p.error(list.At(i).(ast.Expr).Pos(), "identifier expected"); - } - } - } else { - // anonymous field - if list.Len() == 1 { - // TODO should do more checks here - typ = list.At(0).(ast.Expr); - } else { - p.error(p.pos, "anonymous field expected"); - } - } - - return &ast.Field{doc, idents, typ, tag}; -} - - -func (p *parser) parseStructType() *ast.StructType { +func (p *parser) parseChannelType() *ast.ChannelType { if p.trace { - defer un(trace(p, "StructType")); + defer un(trace(p, "ChannelType")); } - pos := p.expect(token.STRUCT); - var lbrace, rbrace token.Position; - var fields []*ast.Field; - if p.tok == token.LBRACE { - lbrace = p.pos; + pos := p.pos; + dir := ast.SEND | ast.RECV; + if p.tok == token.CHAN { p.next(); - - list := vector.New(0); - for p.tok != token.RBRACE && p.tok != token.EOF { - list.Push(p.parseFieldDecl()); - if p.tok == token.SEMICOLON { - p.next(); - } else { - break; - } - } - if p.tok == token.SEMICOLON { + if p.tok == token.ARROW { p.next(); + dir = ast.SEND; } - - rbrace = p.expect(token.RBRACE); - p.opt_semi = true; - - // convert vector - fields = make([]*ast.Field, list.Len()); - for i := list.Len() - 1; i >= 0; i-- { - fields[i] = list.At(i).(*ast.Field); - } - } - - return &ast.StructType{pos, lbrace, fields, rbrace}; -} - - -func (p *parser) parsePointerType() *ast.StarExpr { - if p.trace { - defer un(trace(p, "PointerType")); + } else { + p.expect(token.ARROW); + p.expect(token.CHAN); + dir = ast.RECV; } + value := p.parseType(); - star := p.expect(token.MUL); - base := p.parseType(); - - return &ast.StarExpr{star, base}; + return &ast.ChannelType{pos, dir, value}; } -func (p *parser) tryType() ast.Expr { - if p.trace { - defer un(trace(p, "Type (try)")); - } - +func (p *parser) tryRawType(ellipsis_ok bool) ast.Expr { switch p.tok { case token.IDENT: return p.parseTypeName(); - case token.LBRACK: return p.parseArrayType(); - case token.CHAN, token.ARROW: return p.parseChannelType(); - case token.INTERFACE: return p.parseInterfaceType(); - case token.FUNC: return p.parseFunctionType(); - case token.MAP: return p.parseMapType(); + case token.LBRACK: return p.parseArrayOrSliceType(ellipsis_ok); case token.STRUCT: return p.parseStructType(); case token.MUL: return p.parsePointerType(); + case token.FUNC: return p.parseFunctionType(); + case token.INTERFACE: return p.parseInterfaceType(); + case token.MAP: return p.parseMapType(); + case token.CHAN, token.ARROW: return p.parseChannelType(); case token.LPAREN: lparen := p.pos; p.next(); - x := p.parseType(); + typ := p.parseType(); rparen := p.expect(token.RPAREN); - return &ast.ParenExpr{lparen, x, rparen}; + return &ast.ParenExpr{lparen, typ, rparen}; } // no type found @@ -748,10 +741,15 @@ func (p *parser) tryType() ast.Expr { } +func (p *parser) tryType() ast.Expr { + return p.tryRawType(false); +} + + // ---------------------------------------------------------------------------- // Blocks -func asStmtList(list *vector.Vector) []ast.Stmt { +func makeStmtList(list *vector.Vector) []ast.Stmt { stats := make([]ast.Stmt, list.Len()); for i := 0; i < list.Len(); i++ { stats[i] = list.At(i).(ast.Stmt); @@ -782,13 +780,13 @@ func (p *parser) parseStatementList() []ast.Stmt { } } - return asStmtList(list); + return makeStmtList(list); } func (p *parser) parseBlockStmt() *ast.BlockStmt { if p.trace { - defer un(trace(p, "compositeStmt")); + defer un(trace(p, "BlockStmt")); } lbrace := p.expect(token.LBRACE); @@ -803,20 +801,6 @@ func (p *parser) parseBlockStmt() *ast.BlockStmt { // ---------------------------------------------------------------------------- // Expressions -func (p *parser) parseFunctionLit() ast.Expr { - if p.trace { - defer un(trace(p, "FunctionLit")); - } - - typ := p.parseFunctionType(); - p.expr_lev++; - body := p.parseBlockStmt(); - p.expr_lev--; - - return &ast.FunctionLit{typ, body}; -} - - func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit { if p.trace { defer un(trace(p, "StringList")); @@ -842,6 +826,23 @@ func (p *parser) parseStringList(x *ast.StringLit) []*ast.StringLit { } +func (p *parser) parseFunctionLit() ast.Expr { + if p.trace { + defer un(trace(p, "FunctionLit")); + } + + typ := p.parseFunctionType(); + p.expr_lev++; + body := p.parseBlockStmt(); + p.expr_lev--; + + return &ast.FunctionLit{typ, body}; +} + + +// parseOperand may return an expression or a raw type (incl. array +// types of the form [...]T. Callers must verify the result. +// func (p *parser) parseOperand() ast.Expr { if p.trace { defer un(trace(p, "Operand")); @@ -878,7 +879,7 @@ func (p *parser) parseOperand() ast.Expr { lparen := p.pos; p.next(); p.expr_lev++; - x := p.parseExpression(1); + x := p.parseExpression(); p.expr_lev--; rparen := p.expect(token.RPAREN); return &ast.ParenExpr{lparen, x, rparen}; @@ -887,15 +888,14 @@ func (p *parser) parseOperand() ast.Expr { return p.parseFunctionLit(); default: - t := p.tryType(); + t := p.tryRawType(true); // could be type for composite literal if t != nil { return t; - } else { - p.error(p.pos, "operand expected"); - p.next(); // make progress } } + p.error_expected(p.pos, "operand"); + p.next(); // make progress return &ast.BadExpr{p.pos}; } @@ -910,24 +910,21 @@ func (p *parser) parseSelectorOrTypeAssertion(x ast.Expr) ast.Expr { // selector sel := p.parseIdent(); return &ast.SelectorExpr{x, sel}; - + } + + // type assertion + p.expect(token.LPAREN); + var typ ast.Expr; + if p.tok == token.TYPE { + // special case for type switch + typ = &ast.Ident{p.pos, p.lit}; + p.next(); } else { - // type assertion - p.expect(token.LPAREN); - var typ ast.Expr; - if p.tok == token.TYPE { - // special case for type switch syntax - typ = &ast.Ident{p.pos, p.lit}; - p.next(); - } else { - typ = p.parseType(); - } - p.expect(token.RPAREN); - return &ast.TypeAssertExpr{x, typ}; + typ = p.parseType(); } + p.expect(token.RPAREN); - unreachable(); - return nil; + return &ast.TypeAssertExpr{x, typ}; } @@ -938,28 +935,26 @@ func (p *parser) parseIndexOrSlice(x ast.Expr) ast.Expr { p.expect(token.LBRACK); p.expr_lev++; - index := p.parseExpression(1); - p.expr_lev--; - - if p.tok == token.RBRACK { - // index + begin := p.parseExpression(); + var end ast.Expr; + if p.tok == token.COLON { p.next(); - return &ast.IndexExpr{x, index}; + end = p.parseExpression(); } - - // slice - p.expect(token.COLON); - p.expr_lev++; - end := p.parseExpression(1); p.expr_lev--; p.expect(token.RBRACK); - return &ast.SliceExpr{x, index, end}; + + if end != nil { + return &ast.SliceExpr{x, begin, end}; + } + + return &ast.IndexExpr{x, begin}; } -func (p *parser) parseCall(fun ast.Expr) *ast.CallExpr { +func (p *parser) parseCallOrConversion(fun ast.Expr) *ast.CallExpr { if p.trace { - defer un(trace(p, "Call")); + defer un(trace(p, "CallOrConversion")); } lparen := p.expect(token.LPAREN); @@ -968,34 +963,51 @@ func (p *parser) parseCall(fun ast.Expr) *ast.CallExpr { args = p.parseExpressionList(); } rparen := p.expect(token.RPAREN); + return &ast.CallExpr{fun, lparen, args, rparen}; } -func (p *parser) parseElementList() []ast.Expr { +func (p *parser) parseKeyValueExpr() ast.Expr { + if p.trace { + defer un(trace(p, "KeyValueExpr")); + } + + key := p.parseExpression(); + + if p.tok == token.COLON { + colon := p.pos; + p.next(); + value := p.parseExpression(); + return &ast.KeyValueExpr{key, colon, value}; + } + + return key; +} + + +func isPair(x ast.Expr) bool { + tmp, is_pair := x.(*ast.KeyValueExpr); + return is_pair; +} + + +func (p *parser) parseExpressionOrKeyValueList() []ast.Expr { if p.trace { - defer un(trace(p, "ElementList")); + defer un(trace(p, "ExpressionOrKeyValueList")); } + var pairs bool; list := vector.New(0); - singles := true; - for p.tok != token.RBRACE { - x := p.parseExpression(0); + for p.tok != token.RBRACE && p.tok != token.EOF { + x := p.parseKeyValueExpr(); + if list.Len() == 0 { - // first element determines syntax for remaining elements - if t, is_binary := x.(*ast.BinaryExpr); is_binary && t.Op == token.COLON { - singles = false; - } + pairs = isPair(x); } else { // not the first element - check syntax - if singles { - if t, is_binary := x.(*ast.BinaryExpr); is_binary && t.Op == token.COLON { - p.error(t.X.Pos(), "single value expected; found pair"); - } - } else { - if t, is_binary := x.(*ast.BinaryExpr); !is_binary || t.Op != token.COLON { - p.error(x.Pos(), "key:value pair expected; found single value"); - } + if pairs != isPair(x) { + p.error_expected(x.Pos(), "all single expressions or all key-value pairs"); } } @@ -1026,13 +1038,91 @@ func (p *parser) parseCompositeLit(typ ast.Expr) ast.Expr { lbrace := p.expect(token.LBRACE); var elts []ast.Expr; if p.tok != token.RBRACE { - elts = p.parseElementList(); + elts = p.parseExpressionOrKeyValueList(); } rbrace := p.expect(token.RBRACE); return &ast.CompositeLit{typ, lbrace, elts, rbrace}; } +// TODO apply these make functions more thoroughly +// (all uses of parseExpression) + +// makeExpr makes sure x is an expression and not a type. +func (p *parser) makeExpr(x ast.Expr) ast.Expr { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.BadExpr: return x; + case *ast.Ident: return x; + case *ast.IntLit: return x; + case *ast.FloatLit: return x; + case *ast.CharLit: return x; + case *ast.StringLit: return x; + case *ast.StringList: return x; + case *ast.FunctionLit: return x; + case *ast.CompositeLit: return x; + case *ast.ParenExpr: return p.makeExpr(t.X); + case *ast.SelectorExpr: return x; + case *ast.IndexExpr: return x; + case *ast.SliceExpr: return x; + case *ast.TypeAssertExpr: return x; + case *ast.CallExpr: return x; + case *ast.StarExpr: return x; + case *ast.UnaryExpr: return x; + case *ast.BinaryExpr: return x; + } + + // all other nodes are not proper expressions + p.error_expected(x.Pos(), "expression"); + panic(); + return &ast.BadExpr{x.Pos()}; +} + + +// makeType makes sure x is a type and not an expression. +func (p *parser) makeType(x ast.Expr) ast.Expr { + // TODO should provide predicate in AST nodes + switch t := x.(type) { + case *ast.BadExpr: return x; + case *ast.Ident: return x; + case *ast.ParenExpr: return p.makeType(t.X); + case *ast.ArrayType: + if len, is_ellipsis := t.Len.(*ast.Ellipsis); is_ellipsis { + p.error(len.Pos(), "expected array length, found '...'"); + return &ast.BadExpr{x.Pos()}; + } + return x; + case *ast.SliceType: return x; + case *ast.StructType: return x; + case *ast.FunctionType: return x; + case *ast.InterfaceType: return x; + case *ast.MapType: return x; + case *ast.ChannelType: return x; + } + + // all other nodes are not types + p.error_expected(x.Pos(), "type"); + return &ast.BadExpr{x.Pos()}; +} + + +// makeExprOrType makes sure that x is an expression or a type +// (and not a raw type such as [...]T). +// +func (p *parser) makeExprOrType(x ast.Expr) ast.Expr { + // TODO should provide predicate in AST nodes + if t, is_array := x.(*ast.ArrayType); is_array { + if len, is_ellipsis := t.Len.(*ast.Ellipsis); is_ellipsis { + p.error(len.Pos(), "expected array length, found '...'"); + return &ast.BadExpr{x.Pos()}; + } + } + + // all other nodes are expressions or types + return x; +} + + func (p *parser) parsePrimaryExpr() ast.Expr { if p.trace { defer un(trace(p, "PrimaryExpr")); @@ -1041,21 +1131,21 @@ func (p *parser) parsePrimaryExpr() ast.Expr { x := p.parseOperand(); for { switch p.tok { - case token.PERIOD: x = p.parseSelectorOrTypeAssertion(x); - case token.LBRACK: x = p.parseIndexOrSlice(x); - case token.LPAREN: x = p.parseCall(x); + case token.PERIOD: x = p.parseSelectorOrTypeAssertion(p.makeExpr(x)); + case token.LBRACK: x = p.parseIndexOrSlice(p.makeExpr(x)); + case token.LPAREN: x = p.parseCallOrConversion(p.makeExprOrType(x)); case token.LBRACE: if p.expr_lev >= 0 { x = p.parseCompositeLit(x); } else { - return x; + return p.makeExprOrType(x); } default: - return x; + return p.makeExprOrType(x); } } - unreachable(); + panic(); // unreachable return nil; } @@ -1067,17 +1157,17 @@ func (p *parser) parseUnaryExpr() ast.Expr { switch p.tok { case token.ADD, token.SUB, token.NOT, token.XOR, token.ARROW, token.AND, token.RANGE: - pos, tok := p.pos, p.tok; + pos, op := p.pos, p.tok; p.next(); x := p.parseUnaryExpr(); - return &ast.UnaryExpr{pos, tok, x}; + return &ast.UnaryExpr{pos, op, p.makeExpr(x)}; case token.MUL: // unary "*" expression or pointer type pos := p.pos; p.next(); x := p.parseUnaryExpr(); - return &ast.StarExpr{pos, x}; + return &ast.StarExpr{pos, p.makeExprOrType(x)}; } return p.parsePrimaryExpr(); @@ -1092,10 +1182,10 @@ func (p *parser) parseBinaryExpr(prec1 int) ast.Expr { x := p.parseUnaryExpr(); for prec := p.tok.Precedence(); prec >= prec1; prec-- { for p.tok.Precedence() == prec { - pos, tok := p.pos, p.tok; + pos, op := p.pos, p.tok; p.next(); y := p.parseBinaryExpr(prec + 1); - x = &ast.BinaryExpr{x, pos, tok, y}; + x = &ast.BinaryExpr{p.makeExpr(x), pos, op, p.makeExpr(y)}; } } @@ -1103,16 +1193,12 @@ func (p *parser) parseBinaryExpr(prec1 int) ast.Expr { } -func (p *parser) parseExpression(prec int) ast.Expr { +func (p *parser) parseExpression() ast.Expr { if p.trace { defer un(trace(p, "Expression")); } - if prec < 0 { - panic("precedence must be >= 0"); - } - - return p.parseBinaryExpr(prec); + return p.parseBinaryExpr(token.LowestPrec + 1); } @@ -1172,11 +1258,11 @@ func (p *parser) parseSimpleStmt() ast.Stmt { func (p *parser) parseCallExpr() *ast.CallExpr { - x := p.parseExpression(1); + x := p.parseExpression(); if call, is_call := x.(*ast.CallExpr); is_call { return call; } - p.error(x.Pos(), "expected function/method call"); + p.error_expected(x.Pos(), "function/method call"); return nil; } @@ -1256,16 +1342,12 @@ func (p *parser) asExpr(s ast.Stmt) ast.Expr { if es, is_expr := s.(*ast.ExprStmt); is_expr { return es.X; } - p.error(s.Pos(), "condition expected; found simple statement"); + p.error(s.Pos(), "expected condition, found simple statement"); return &ast.BadExpr{s.Pos()}; } func (p *parser) parseControlClause(isForStmt bool) (s1, s2, s3 ast.Stmt) { - if p.trace { - defer un(trace(p, "ControlClause")); - } - if p.tok != token.LBRACE { prev_lev := p.expr_lev; p.expr_lev = -1; @@ -1375,25 +1457,21 @@ func (p *parser) parseSwitchStmt() ast.Stmt { } rbrace := p.expect(token.RBRACE); p.opt_semi = true; - body := &ast.BlockStmt{lbrace, asStmtList(cases), rbrace}; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; return &ast.SwitchStmt{pos, s1, p.asExpr(s2), body}; - - } else { - // type switch - // TODO do all the checks! - lbrace := p.expect(token.LBRACE); - cases := vector.New(0); - for p.tok == token.CASE || p.tok == token.DEFAULT { - cases.Push(p.parseTypeCaseClause()); - } - rbrace := p.expect(token.RBRACE); - p.opt_semi = true; - body := &ast.BlockStmt{lbrace, asStmtList(cases), rbrace}; - return &ast.TypeSwitchStmt{pos, s1, s2, body}; } - unreachable(); - return nil; + // type switch + // TODO do all the checks! + lbrace := p.expect(token.LBRACE); + cases := vector.New(0); + for p.tok == token.CASE || p.tok == token.DEFAULT { + cases.Push(p.parseTypeCaseClause()); + } + rbrace := p.expect(token.RBRACE); + p.opt_semi = true; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; + return &ast.TypeSwitchStmt{pos, s1, s2, body}; } @@ -1410,17 +1488,17 @@ func (p *parser) parseCommClause() *ast.CommClause { p.next(); if p.tok == token.ARROW { // RecvExpr without assignment - rhs = p.parseExpression(1); + rhs = p.parseExpression(); } else { // SendExpr or RecvExpr - rhs = p.parseExpression(1); + rhs = p.parseExpression(); if p.tok == token.ASSIGN || p.tok == token.DEFINE { // RecvExpr with assignment tok = p.tok; p.next(); lhs = rhs; if p.tok == token.ARROW { - rhs = p.parseExpression(1); + rhs = p.parseExpression(); } else { p.expect(token.ARROW); // use expect() error handling } @@ -1451,7 +1529,7 @@ func (p *parser) parseSelectStmt() *ast.SelectStmt { } rbrace := p.expect(token.RBRACE); p.opt_semi = true; - body := &ast.BlockStmt{lbrace, asStmtList(cases), rbrace}; + body := &ast.BlockStmt{lbrace, makeStmtList(cases), rbrace}; return &ast.SelectStmt{pos, body}; } @@ -1469,7 +1547,7 @@ func (p *parser) parseForStmt() ast.Stmt { if as, is_as := s2.(*ast.AssignStmt); is_as { // possibly a for statement with a range clause; check assignment operator if as.Tok != token.ASSIGN && as.Tok != token.DEFINE { - p.error(as.TokPos, "'=' or ':=' expected"); + p.error_expected(as.TokPos, "'=' or ':='"); return &ast.BadStmt{pos}; } // check lhs @@ -1481,19 +1559,19 @@ func (p *parser) parseForStmt() ast.Stmt { case 1: key = as.Lhs[0]; default: - p.error(as.Lhs[0].Pos(), "expected 1 or 2 expressions"); + p.error_expected(as.Lhs[0].Pos(), "1 or 2 expressions"); return &ast.BadStmt{pos}; } // check rhs if len(as.Rhs) != 1 { - p.error(as.Rhs[0].Pos(), "expected 1 expressions"); + p.error_expected(as.Rhs[0].Pos(), "1 expressions"); return &ast.BadStmt{pos}; } if rhs, is_unary := as.Rhs[0].(*ast.UnaryExpr); is_unary && rhs.Op == token.RANGE { // rhs is range expression; check lhs return &ast.RangeStmt{pos, key, value, as.TokPos, as.Tok, rhs.X, body} } else { - p.error(s2.Pos(), "range clause expected"); + p.error_expected(s2.Pos(), "range clause"); return &ast.BadStmt{pos}; } } else { @@ -1501,7 +1579,7 @@ func (p *parser) parseForStmt() ast.Stmt { return &ast.ForStmt{pos, s1, p.asExpr(s2), s3, body}; } - unreachable(); + panic(); // unreachable return nil; } @@ -1532,19 +1610,19 @@ func (p *parser) parseStatement() ast.Stmt { return p.parseBlockStmt(); case token.IF: return p.parseIfStmt(); - case token.FOR: - return p.parseForStmt(); case token.SWITCH: return p.parseSwitchStmt(); case token.SELECT: return p.parseSelectStmt(); + case token.FOR: + return p.parseForStmt(); case token.SEMICOLON, token.RBRACE: // don't consume the ";", it is the separator following the empty statement return &ast.EmptyStmt{p.pos}; } // no statement found - p.error(p.pos, "statement expected"); + p.error_expected(p.pos, "statement"); return &ast.BadStmt{p.pos}; } @@ -1581,7 +1659,7 @@ func (p *parser) parseConstSpec(pos token.Position, doc ast.Comments) *ast.Const defer un(trace(p, "ConstSpec")); } - names := p.parseIdentList(nil); + idents := p.parseIdentList(nil); typ := p.tryType(); var values []ast.Expr; if typ != nil || p.tok == token.ASSIGN { @@ -1589,7 +1667,7 @@ func (p *parser) parseConstSpec(pos token.Position, doc ast.Comments) *ast.Const values = p.parseExpressionList(); } - return &ast.ConstDecl{doc, pos, names, typ, values}; + return &ast.ConstDecl{doc, pos, idents, typ, values}; } @@ -1610,7 +1688,7 @@ func (p *parser) parseVarSpec(pos token.Position, doc ast.Comments) *ast.VarDecl defer un(trace(p, "VarSpec")); } - names := p.parseIdentList(nil); + idents := p.parseIdentList(nil); typ := p.tryType(); var values []ast.Expr; if typ == nil || p.tok == token.ASSIGN { @@ -1618,7 +1696,7 @@ func (p *parser) parseVarSpec(pos token.Position, doc ast.Comments) *ast.VarDecl values = p.parseExpressionList(); } - return &ast.VarDecl{doc, pos, names, typ, values}; + return &ast.VarDecl{doc, pos, idents, typ, values}; } @@ -1630,7 +1708,7 @@ func (p *parser) parseSpec(pos token.Position, doc ast.Comments, keyword int) as case token.VAR: return p.parseVarSpec(pos, doc); } - unreachable(); + panic(); // unreachable return nil; } @@ -1647,7 +1725,7 @@ func (p *parser) parseDecl(keyword int) ast.Decl { p.next(); list := vector.New(0); for p.tok != token.RPAREN && p.tok != token.EOF { - list.Push(p.parseSpec(nopos, nil, keyword)); + list.Push(p.parseSpec(noPos, nil, keyword)); if p.tok == token.SEMICOLON { p.next(); } else { @@ -1670,15 +1748,6 @@ func (p *parser) parseDecl(keyword int) ast.Decl { } -// Function and method declarations -// -// func ident (params) -// func ident (params) type -// func ident (params) (results) -// func (recv) ident (params) -// func (recv) ident (params) type -// func (recv) ident (params) (results) - func (p *parser) parseFunctionDecl() *ast.FuncDecl { if p.trace { defer un(trace(p, "FunctionDecl")); @@ -1690,11 +1759,11 @@ func (p *parser) parseFunctionDecl() *ast.FuncDecl { var recv *ast.Field; if p.tok == token.LPAREN { pos := p.pos; - tmp := p.parseParameters(true); + tmp := p.parseParameters(false); if len(tmp) == 1 { recv = tmp[0]; } else { - p.error(pos, "must have exactly one receiver"); + p.error_expected(pos, "exactly one receiver"); } } @@ -1723,7 +1792,7 @@ func (p *parser) parseDeclaration() ast.Decl { } pos := p.pos; - p.error(pos, "declaration expected"); + p.error_expected(pos, "declaration"); p.next(); // make progress return &ast.BadDecl{pos}; } @@ -1732,16 +1801,18 @@ func (p *parser) parseDeclaration() ast.Decl { // ---------------------------------------------------------------------------- // Packages -// The Mode constants control how much of the source text is parsed. -type Mode int; +// A set of flags (or 0) must be provided via the mode parameter to +// the Parse function. They control the amount of source code parsed +// and other optional parser functionality. +// const ( - ParseEntirePackage Mode = iota; - ParseImportDeclsOnly; - ParsePackageClauseOnly; + PackageClauseOnly = 1 << iota; // parsing stops after package clause + ImportsOnly; // parsing stops after import declarations + Trace; // print a trace of parsed productions ) -func (p *parser) parsePackage(mode Mode) *ast.Package { +func (p *parser) parsePackage() *ast.Package { if p.trace { defer un(trace(p, "Program")); } @@ -1749,16 +1820,15 @@ func (p *parser) parsePackage(mode Mode) *ast.Package { // package clause comment := p.getDoc(); pos := p.expect(token.PACKAGE); - name := p.parseIdent(); + ident := p.parseIdent(); if p.tok == token.SEMICOLON { // common error p.error(p.pos, "extra semicolon"); p.next(); } - - + var decls []ast.Decl; - if mode <= ParseImportDeclsOnly { + if p.mode & PackageClauseOnly == 0 { // import decls list := vector.New(0); for p.tok == token.IMPORT { @@ -1768,7 +1838,7 @@ func (p *parser) parsePackage(mode Mode) *ast.Package { } } - if mode <= ParseEntirePackage { + if p.mode & ImportsOnly == 0 { // rest of package body for p.tok != token.EOF { list.Push(p.parseDeclaration()); @@ -1791,7 +1861,7 @@ func (p *parser) parsePackage(mode Mode) *ast.Package { comments[i] = p.comments.At(i).(*ast.Comment); } - return &ast.Package{comment, pos, name, decls, comments}; + return &ast.Package{comment, pos, ident, decls, comments}; } @@ -1809,23 +1879,25 @@ func (p *parser) parsePackage(mode Mode) *ast.Package { // be constructed partially, with ast.BadX nodes representing the fragments // of source code that contained syntax errors. // -// The amount of source text parsed can be controlled with the mode parameter. -// The flags parameter controls optional parser functionality such as tracing. +// The mode parameter controls the amount of source text parsed and other +// optional parser functionality. // // (*) Note that a scanner may find lexical syntax errors but still return // a legal token sequence. To be sure there are no syntax errors in the // source (and not just the token sequence corresponding to the source) // both the parser and scanner error count must be 0. // -func Parse(scanner Scanner, err ErrorHandler, mode Mode, flags uint) (*ast.Package, int) { +func Parse(scanner Scanner, err ErrorHandler, mode uint) (*ast.Package, int) { // initialize parser state var p parser; p.scanner = scanner; p.err = err; - p.trace = flags & Trace != 0; + p.mode = mode; + p.trace = mode & Trace != 0; // for convenience (p.trace is used frequently) p.comments.Init(0); p.next(); // parse program - return p.parsePackage(mode), p.errorCount; + pak := p.parsePackage(); + return pak, p.errorCount; } diff --git a/usr/gri/pretty/printer.go b/usr/gri/pretty/printer.go index 024e44d54e..5595a2b837 100644 --- a/usr/gri/pretty/printer.go +++ b/usr/gri/pretty/printer.go @@ -591,6 +591,15 @@ func (P *Printer) DoBinaryExpr(x *ast.BinaryExpr) { } +func (P *Printer) DoKeyValueExpr(x *ast.KeyValueExpr) { + P.Expr(x.Key); + P.separator = blank; + P.Token(x.Colon, token.COLON); + P.separator = blank; + P.Expr(x.Value); +} + + func (P *Printer) DoStarExpr(x *ast.StarExpr) { P.Token(x.Pos(), token.MUL); P.Expr(x.X); @@ -721,9 +730,14 @@ func (P *Printer) DoEllipsis(x *ast.Ellipsis) { func (P *Printer) DoArrayType(x *ast.ArrayType) { P.Token(x.Pos(), token.LBRACK); - if x.Len != nil { - P.Expr(x.Len); - } + P.Expr(x.Len); + P.Token(nopos, token.RBRACK); + P.Expr(x.Elt); +} + + +func (P *Printer) DoSliceType(x *ast.SliceType) { + P.Token(x.Pos(), token.LBRACK); P.Token(nopos, token.RBRACK); P.Expr(x.Elt); } @@ -751,11 +765,6 @@ func (P *Printer) DoInterfaceType(x *ast.InterfaceType) { } -func (P *Printer) DoSliceType(x *ast.SliceType) { - unimplemented(); -} - - func (P *Printer) DoMapType(x *ast.MapType) { P.Token(x.Pos(), token.MAP); P.separator = blank;