implemented InsertSemis mode for go/scanner

author Robert Griesemer <gri@golang.org>

Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)

committer Robert Griesemer <gri@golang.org>

Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
author Robert Griesemer <gri@golang.org>
Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
committer Robert Griesemer <gri@golang.org>
Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go

index 177fe0f19a3ffe9b8b8472b44ec12ab9e9ae0567..386cdb0e9f0f8ade8711874916b8a254a1d66082 100644 (file)
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -29,9 +29,11 @@ type Scanner struct {
         mode    uint;           // scanning mode
  
         // scanning state
-       pos     token.Position; // previous reading position (position before ch)
-       offset  int;            // current reading offset (position after ch)
-       ch      int;            // one char look-ahead
+       pos             token.Position; // previous reading position (position before ch)
+       offset          int;            // current reading offset (position after ch)
+       ch              int;            // one char look-ahead
+       insertSemi      bool;           // insert a semicolon before next newline
+       pendingComment  token.Position; // valid if pendingComment.Line > 0
  
         // public state - ok to modify
         ErrorCount      int;    // number of errors encountered
@@ -69,6 +71,7 @@ func (S *Scanner) next() {
  const (
         ScanComments            = 1 << iota;    // return comments as COMMENT tokens
         AllowIllegalChars;      // do not report an error for illegal chars
+       InsertSemis;            // automatically insert semicolons
  )
  
  
@@ -420,6 +423,8 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
  }
  
  
+var semicolon = []byte{';'}
+
  // Scan scans the next token and returns the token position pos,
  // the token tok, and the literal text lit corresponding to the
  // token. The source end is indicated by token.EOF.
@@ -432,40 +437,63 @@ func (S *Scanner) switch4(tok0, tok1 token.Token, ch2 int, tok2, tok3 token.Toke
  // of the error handler, if there was one installed.
  //
  func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-scan_again:
+       if S.pendingComment.Line > 0 {
+               // "consume" pending comment
+               S.pos = S.pendingComment;
+               S.offset = S.pos.Offset + 1;
+               S.ch = '/';
+               S.pendingComment.Line = 0;
+       }
+
+scanAgain:
         // skip white space
-       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
+       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
                 S.next()
         }
  
         // current token start
+       insertSemi := false;
         pos, tok = S.pos, token.ILLEGAL;
  
         // determine token value
         switch ch := S.ch; {
         case isLetter(ch):
-               tok = S.scanIdentifier()
+               tok = S.scanIdentifier();
+               switch tok {
+               case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+                       insertSemi = true
+               default:
+                       insertSemi = false
+               }
         case digitVal(ch) < 10:
-               tok = S.scanNumber(false)
+               insertSemi = true;
+               tok = S.scanNumber(false);
         default:
                 S.next();       // always make progress
                 switch ch {
                 case -1:
                         tok = token.EOF
+               case '\n':
+                       S.insertSemi = false;
+                       return pos, token.SEMICOLON, semicolon;
                 case '"':
+                       insertSemi = true;
                         tok = token.STRING;
                         S.scanString(pos);
                 case '\'':
+                       insertSemi = true;
                         tok = token.CHAR;
                         S.scanChar(pos);
                 case '`':
+                       insertSemi = true;
                         tok = token.STRING;
                         S.scanRawString(pos);
                 case ':':
                         tok = S.switch2(token.COLON, token.DEFINE)
                 case '.':
                         if digitVal(S.ch) < 10 {
-                               tok = S.scanNumber(true)
+                               insertSemi = true;
+                               tok = S.scanNumber(true);
                         } else if S.ch == '.' {
                                 S.next();
                                 if S.ch == '.' {
@@ -482,27 +510,57 @@ scan_again:
                 case '(':
                         tok = token.LPAREN
                 case ')':
-                       tok = token.RPAREN
+                       insertSemi = true;
+                       tok = token.RPAREN;
                 case '[':
                         tok = token.LBRACK
                 case ']':
-                       tok = token.RBRACK
+                       insertSemi = true;
+                       tok = token.RBRACK;
                 case '{':
                         tok = token.LBRACE
                 case '}':
-                       tok = token.RBRACE
+                       insertSemi = true;
+                       tok = token.RBRACE;
                 case '+':
-                       tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
+                       tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
+                       if tok == token.INC {
+                               insertSemi = true
+                       }
                 case '-':
-                       tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
+                       tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
+                       if tok == token.DEC {
+                               insertSemi = true
+                       }
                 case '*':
                         tok = S.switch2(token.MUL, token.MUL_ASSIGN)
                 case '/':
                         if S.ch == '/' || S.ch == '*' {
-                               S.scanComment(pos);
-                               tok = token.COMMENT;
-                               if S.mode&ScanComments == 0 {
-                                       goto scan_again
+                               // comment
+                               newline := false;
+                               if S.insertSemi {
+                                       if S.ch == '/' {
+                                               // a line comment acts like a newline
+                                               newline = true
+                                       } else {
+                                               // a general comment may act like a newline
+                                               S.scanComment(pos);
+                                               newline = pos.Line < S.pos.Line;
+                                       }
+                               } else {
+                                       S.scanComment(pos)
+                               }
+                               if newline {
+                                       // insert a semicolon and retain pending comment
+                                       S.insertSemi = false;
+                                       S.pendingComment = pos;
+                                       return pos, token.SEMICOLON, semicolon;
+                               } else if S.mode&ScanComments == 0 {
+                                       // skip comment
+                                       goto scanAgain
+                               } else {
+                                       insertSemi = S.insertSemi;      // preserve insertSemi info
+                                       tok = token.COMMENT;
                                 }
                         } else {
                                 tok = S.switch2(token.QUO, token.QUO_ASSIGN)
@@ -537,9 +595,13 @@ scan_again:
                         if S.mode&AllowIllegalChars == 0 {
                                 S.error(pos, "illegal character "+charString(ch))
                         }
+                       insertSemi = S.insertSemi;      // preserve insertSemi info
                 }
         }
  
+       if S.mode&InsertSemis != 0 {
+               S.insertSemi = insertSemi
+       }
         return pos, tok, S.src[pos.Offset:S.pos.Offset];
  }
  
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go

index c133289268a749f761806a20734c77bcca7e8cb2..ddaaab27fd2d7d593a8a2c94a077c7d5ae3efcd7 100644 (file)
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -225,13 +225,13 @@ func TestScan(t *testing.T) {
                         }
                         checkPos(t, lit, pos, epos);
                         if tok != e.tok {
-                               t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String())
+                               t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
                         }
                         if e.tok.IsLiteral() && lit != e.lit {
-                               t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit)
+                               t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
                         }
                         if tokenclass(tok) != e.class {
-                               t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class)
+                               t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
                         }
                         epos.Offset += len(lit) + len(whitespace);
                         epos.Line += NewlineCount(lit) + whitespace_linecount;
@@ -249,6 +249,160 @@ func TestScan(t *testing.T) {
  }
  
  
+func getTok(_ token.Position, tok token.Token, _ []byte) token.Token {
+       return tok
+}
+
+
+func checkSemi(t *testing.T, line string, mode uint) {
+       var S Scanner;
+       S.Init("TestSemis", strings.Bytes(line), nil, mode);
+       pos, tok, lit := S.Scan();
+       for tok != token.EOF {
+               if tok == token.ILLEGAL {
+                       // next token must be a semicolon
+                       offs := pos.Offset + 1;
+                       pos, tok, lit = S.Scan();
+                       if tok == token.SEMICOLON {
+                               if pos.Offset != offs {
+                                       t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
+                               }
+                               if string(lit) != ";" {
+                                       t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
+                               }
+                       } else {
+                               t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
+                       }
+               } else if tok == token.SEMICOLON {
+                       t.Errorf("bad token for %q: got ;, expected no ;", line)
+               }
+               pos, tok, lit = S.Scan();
+       }
+}
+
+
+var lines = []string{
+       // the $ character indicates where a semicolon is expected
+       "",
+       "foo$\n",
+       "123$\n",
+       "1.2$\n",
+       "'x'$\n",
+       `"x"` + "$\n",
+       "`x`$\n",
+
+       "+\n",
+       "-\n",
+       "*\n",
+       "/\n",
+       "%\n",
+
+       "&\n",
+       "|\n",
+       "^\n",
+       "<<\n",
+       ">>\n",
+       "&^\n",
+
+       "+=\n",
+       "-=\n",
+       "*=\n",
+       "/=\n",
+       "%=\n",
+
+       "&=\n",
+       "|=\n",
+       "^=\n",
+       "<<=\n",
+       ">>=\n",
+       "&^=\n",
+
+       "&&\n",
+       "||\n",
+       "<-\n",
+       "++$\n",
+       "--$\n",
+
+       "==\n",
+       "<\n",
+       ">\n",
+       "=\n",
+       "!\n",
+
+       "!=\n",
+       "<=\n",
+       ">=\n",
+       ":=\n",
+       "...\n",
+
+       "(\n",
+       "[\n",
+       "{\n",
+       ",\n",
+       ".\n",
+
+       ")$\n",
+       "]$\n",
+       "}$\n",
+       "$;\n",
+       ":\n",
+
+       "break$\n",
+       "case\n",
+       "chan\n",
+       "const\n",
+       "continue$\n",
+
+       "default\n",
+       "defer\n",
+       "else\n",
+       "fallthrough$\n",
+       "for\n",
+
+       "func\n",
+       "go\n",
+       "goto\n",
+       "if\n",
+       "import\n",
+
+       "interface\n",
+       "map\n",
+       "package\n",
+       "range\n",
+       "return$\n",
+
+       "select\n",
+       "struct\n",
+       "switch\n",
+       "type\n",
+       "var\n",
+
+       "foo$//comment\n",
+       "foo$/*comment*/\n",
+       "foo$/*\n*/",
+       "foo    $// comment\n",
+       "foo    $/*comment*/\n",
+       "foo    $/*\n*/",
+
+       // TODO(gri): These need to insert the semicolon *before* the
+       //            first comment which requires arbitrary far look-
+       //            ahead. Only relevant for gofmt placement of
+       //            comments.
+       "foo    /*comment*/    $\n",
+       "foo    /*0*/ /*1*/ $/*2*/\n",
+}
+
+
+func TestSemis(t *testing.T) {
+       for _, line := range lines {
+               checkSemi(t, line, AllowIllegalChars|InsertSemis)
+       }
+       for _, line := range lines {
+               checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+       }
+}
+
+
  type seg struct {
         srcline         string; // a line of source text
         filename        string; // filename for current token
author	Robert Griesemer <gri@golang.org>
	Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
committer	Robert Griesemer <gri@golang.org>
	Thu, 10 Dec 2009 23:31:02 +0000 (15:31 -0800)
src/pkg/go/scanner/scanner.go		patch \| blob \| history
src/pkg/go/scanner/scanner_test.go		patch \| blob \| history