]> Cypherpunks repositories - gostls13.git/commitdiff
implemented InsertSemis mode for go/scanner
authorRobert Griesemer <gri@golang.org>
Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
committerRobert Griesemer <gri@golang.org>
Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
R=rsc
https://golang.org/cl/175047

src/pkg/go/scanner/scanner.go
src/pkg/go/scanner/scanner_test.go

index 177fe0f19a3ffe9b8b8472b44ec12ab9e9ae0567..386cdb0e9f0f8ade8711874916b8a254a1d66082 100644 (file)
@@ -29,9 +29,11 @@ type Scanner struct {
        mode    uint;           // scanning mode
 
        // scanning state
-       pos     token.Position; // previous reading position (position before ch)
-       offset  int;            // current reading offset (position after ch)
-       ch      int;            // one char look-ahead
+       pos             token.Position; // previous reading position (position before ch)
+       offset          int;            // current reading offset (position after ch)
+       ch              int;            // one char look-ahead
+       insertSemi      bool;           // insert a semicolon before next newline
+       pendingComment  token.Position; // valid if pendingComment.Line > 0
 
        // public state - ok to modify
        ErrorCount      int;    // number of errors encountered
@@ -69,6 +71,7 @@ func (S *Scanner) next() {
 const (
        ScanComments            = 1 << iota;    // return comments as COMMENT tokens
        AllowIllegalChars;      // do not report an error for illegal chars
+       InsertSemis;            // automatically insert semicolons
 )
 
 
@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 }
 
 
+var semicolon = []byte{';'}
+
 // Scan scans the next token and returns the token position pos,
 // the token tok, and the literal text lit corresponding to the
 // token. The source end is indicated by token.EOF.
@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
 // of the error handler, if there was one installed.
 //
 func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-scan_again:
+       if S.pendingComment.Line > 0 {
+               // "consume" pending comment
+               S.pos = S.pendingComment;
+               S.offset = S.pos.Offset + 1;
+               S.ch = '/';
+               S.pendingComment.Line = 0;
+       }
+
+scanAgain:
        // skip white space
-       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
+       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
                S.next()
        }
 
        // current token start
+       insertSemi := false;
        pos, tok = S.pos, token.ILLEGAL;
 
        // determine token value
        switch ch := S.ch; {
        case isLetter(ch):
-               tok = S.scanIdentifier()
+               tok = S.scanIdentifier();
+               switch tok {
+               case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+                       insertSemi = true
+               default:
+                       insertSemi = false
+               }
        case digitVal(ch) < 10:
-               tok = S.scanNumber(false)
+               insertSemi = true;
+               tok = S.scanNumber(false);
        default:
                S.next();       // always make progress
                switch ch {
                case -1:
                        tok = token.EOF
+               case '\n':
+                       S.insertSemi = false;
+                       return pos, token.SEMICOLON, semicolon;
                case '"':
+                       insertSemi = true;
                        tok = token.STRING;
                        S.scanString(pos);
                case '\'':
+                       insertSemi = true;
                        tok = token.CHAR;
                        S.scanChar(pos);
                case '`':
+                       insertSemi = true;
                        tok = token.STRING;
                        S.scanRawString(pos);
                case ':':
                        tok = S.switch2(token.COLON, token.DEFINE)
                case '.':
                        if digitVal(S.ch) < 10 {
-                               tok = S.scanNumber(true)
+                               insertSemi = true;
+                               tok = S.scanNumber(true);
                        } else if S.ch == '.' {
                                S.next();
                                if S.ch == '.' {
@@ -482,27 +510,57 @@ scan_again:
                case '(':
                        tok = token.LPAREN
                case ')':
-                       tok = token.RPAREN
+                       insertSemi = true;
+                       tok = token.RPAREN;
                case '[':
                        tok = token.LBRACK
                case ']':
-                       tok = token.RBRACK
+                       insertSemi = true;
+                       tok = token.RBRACK;
                case '{':
                        tok = token.LBRACE
                case '}':
-                       tok = token.RBRACE
+                       insertSemi = true;
+                       tok = token.RBRACE;
                case '+':
-                       tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
+                       tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
+                       if tok == token.INC {
+                               insertSemi = true
+                       }
                case '-':
-                       tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
+                       tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
+                       if tok == token.DEC {
+                               insertSemi = true
+                       }
                case '*':
                        tok = S.switch2(token.MUL, token.MUL_ASSIGN)
                case '/':
                        if S.ch == '/' || S.ch == '*' {
-                               S.scanComment(pos);
-                               tok = token.COMMENT;
-                               if S.mode&ScanComments == 0 {
-                                       goto scan_again
+                               // comment
+                               newline := false;
+                               if S.insertSemi {
+                                       if S.ch == '/' {
+                                               // a line comment acts like a newline
+                                               newline = true
+                                       } else {
+                                               // a general comment may act like a newline
+                                               S.scanComment(pos);
+                                               newline = pos.Line < S.pos.Line;
+                                       }
+                               } else {
+                                       S.scanComment(pos)
+                               }
+                               if newline {
+                                       // insert a semicolon and retain pending comment
+                                       S.insertSemi = false;
+                                       S.pendingComment = pos;
+                                       return pos, token.SEMICOLON, semicolon;
+                               } else if S.mode&ScanComments == 0 {
+                                       // skip comment
+                                       goto scanAgain
+                               } else {
+                                       insertSemi = S.insertSemi;      // preserve insertSemi info
+                                       tok = token.COMMENT;
                                }
                        } else {
                                tok = S.switch2(token.QUO, token.QUO_ASSIGN)
@@ -537,9 +595,13 @@ scan_again:
                        if S.mode&AllowIllegalChars == 0 {
                                S.error(pos, "illegal character "+charString(ch))
                        }
+                       insertSemi = S.insertSemi;      // preserve insertSemi info
                }
        }
 
+       if S.mode&InsertSemis != 0 {
+               S.insertSemi = insertSemi
+       }
        return pos, tok, S.src[pos.Offset:S.pos.Offset];
 }
 
index c133289268a749f761806a20734c77bcca7e8cb2..ddaaab27fd2d7d593a8a2c94a077c7d5ae3efcd7 100644 (file)
@@ -225,13 +225,13 @@ func TestScan(t *testing.T) {
                        }
                        checkPos(t, lit, pos, epos);
                        if tok != e.tok {
-                               t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String())
+                               t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
                        }
                        if e.tok.IsLiteral() && lit != e.lit {
-                               t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit)
+                               t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
                        }
                        if tokenclass(tok) != e.class {
-                               t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class)
+                               t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
                        }
                        epos.Offset += len(lit) + len(whitespace);
                        epos.Line += NewlineCount(lit) + whitespace_linecount;
@@ -249,6 +249,160 @@ func TestScan(t *testing.T) {
 }
 
 
+func getTok(_ token.Position, tok token.Token, _ []byte) token.Token {
+       return tok
+}
+
+
+func checkSemi(t *testing.T, line string, mode uint) {
+       var S Scanner;
+       S.Init("TestSemis", strings.Bytes(line), nil, mode);
+       pos, tok, lit := S.Scan();
+       for tok != token.EOF {
+               if tok == token.ILLEGAL {
+                       // next token must be a semicolon
+                       offs := pos.Offset + 1;
+                       pos, tok, lit = S.Scan();
+                       if tok == token.SEMICOLON {
+                               if pos.Offset != offs {
+                                       t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
+                               }
+                               if string(lit) != ";" {
+                                       t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
+                               }
+                       } else {
+                               t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
+                       }
+               } else if tok == token.SEMICOLON {
+                       t.Errorf("bad token for %q: got ;, expected no ;", line)
+               }
+               pos, tok, lit = S.Scan();
+       }
+}
+
+
+var lines = []string{
+       // the $ character indicates where a semicolon is expected
+       "",
+       "foo$\n",
+       "123$\n",
+       "1.2$\n",
+       "'x'$\n",
+       `"x"` + "$\n",
+       "`x`$\n",
+
+       "+\n",
+       "-\n",
+       "*\n",
+       "/\n",
+       "%\n",
+
+       "&\n",
+       "|\n",
+       "^\n",
+       "<<\n",
+       ">>\n",
+       "&^\n",
+
+       "+=\n",
+       "-=\n",
+       "*=\n",
+       "/=\n",
+       "%=\n",
+
+       "&=\n",
+       "|=\n",
+       "^=\n",
+       "<<=\n",
+       ">>=\n",
+       "&^=\n",
+
+       "&&\n",
+       "||\n",
+       "<-\n",
+       "++$\n",
+       "--$\n",
+
+       "==\n",
+       "<\n",
+       ">\n",
+       "=\n",
+       "!\n",
+
+       "!=\n",
+       "<=\n",
+       ">=\n",
+       ":=\n",
+       "...\n",
+
+       "(\n",
+       "[\n",
+       "{\n",
+       ",\n",
+       ".\n",
+
+       ")$\n",
+       "]$\n",
+       "}$\n",
+       "$;\n",
+       ":\n",
+
+       "break$\n",
+       "case\n",
+       "chan\n",
+       "const\n",
+       "continue$\n",
+
+       "default\n",
+       "defer\n",
+       "else\n",
+       "fallthrough$\n",
+       "for\n",
+
+       "func\n",
+       "go\n",
+       "goto\n",
+       "if\n",
+       "import\n",
+
+       "interface\n",
+       "map\n",
+       "package\n",
+       "range\n",
+       "return$\n",
+
+       "select\n",
+       "struct\n",
+       "switch\n",
+       "type\n",
+       "var\n",
+
+       "foo$//comment\n",
+       "foo$/*comment*/\n",
+       "foo$/*\n*/",
+       "foo    $// comment\n",
+       "foo    $/*comment*/\n",
+       "foo    $/*\n*/",
+
+       // TODO(gri): These need to insert the semicolon *before* the
+       //            first comment which requires arbitrary far look-
+       //            ahead. Only relevant for gofmt placement of
+       //            comments.
+       "foo    /*comment*/    $\n",
+       "foo    /*0*/ /*1*/ $/*2*/\n",
+}
+
+
+func TestSemis(t *testing.T) {
+       for _, line := range lines {
+               checkSemi(t, line, AllowIllegalChars|InsertSemis)
+       }
+       for _, line := range lines {
+               checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+       }
+}
+
+
 type seg struct {
        srcline         string; // a line of source text
        filename        string; // filename for current token