fix TODO: insert semicolons before any sequence of comments

author Robert Griesemer <gri@golang.org>

Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)

committer Robert Griesemer <gri@golang.org>

Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
author Robert Griesemer <gri@golang.org>
Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
committer Robert Griesemer <gri@golang.org>
Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go

index 386cdb0e9f0f8ade8711874916b8a254a1d66082..026ae99760e63c9182edded28f2ec7d20841fef3 100644 (file)
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -33,7 +33,6 @@ type Scanner struct {
         offset          int;            // current reading offset (position after ch)
         ch              int;            // one char look-ahead
         insertSemi      bool;           // insert a semicolon before next newline
-       pendingComment  token.Position; // valid if pendingComment.Line > 0
  
         // public state - ok to modify
         ErrorCount      int;    // number of errors encountered
@@ -151,7 +150,7 @@ func (S *Scanner) scanComment(pos token.Position) {
                 for S.ch >= 0 {
                         S.next();
                         if S.ch == '\n' {
-                               // '\n' is not part of the comment
+                               // '\n' is not part of the comment for purposes of scanning
                                 // (the comment ends on the same line where it started)
                                 if pos.Column == 1 {
                                         text := S.src[pos.Offset+2 : S.pos.Offset];
@@ -190,6 +189,49 @@ func (S *Scanner) scanComment(pos token.Position) {
  }
  
  
+func (S *Scanner) findNewline(pos token.Position) bool {
+       // first '/' already consumed; assume S.ch == '/' || S.ch == '*'
+
+       // read ahead until a newline or non-comment token is found
+       newline := false;
+       for pos1 := pos; S.ch >= 0; {
+               if S.ch == '/' {
+                       //-style comment always contains a newline
+                       newline = true;
+                       break;
+               }
+               S.scanComment(pos1);
+               if pos1.Line < S.pos.Line {
+                       /*-style comment contained a newline */
+                       newline = true;
+                       break;
+               }
+               S.skipWhitespace();
+               if S.ch == '\n' {
+                       newline = true;
+                       break;
+               }
+               if S.ch != '/' {
+                       // non-comment token
+                       break
+               }
+               pos1 = S.pos;
+               S.next();
+               if S.ch != '/' && S.ch != '*' {
+                       // non-comment token
+                       break
+               }
+       }
+
+       // reset position
+       S.pos = pos;
+       S.offset = pos.Offset + 1;
+       S.ch = '/';
+
+       return newline;
+}
+
+
  func isLetter(ch int) bool {
         return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
  }
@@ -378,6 +420,13 @@ func (S *Scanner) scanRawString(pos token.Position) {
  }
  
  
+func (S *Scanner) skipWhitespace() {
+       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
+               S.next()
+       }
+}
+
+
  // Helper functions for scanning multi-byte tokens such as >> += >>= .
  // Different routines recognize different length tok_i based on matches
  // of ch_i. If a token ends in '=', the result is tok1 or tok3
@@ -437,19 +486,8 @@ var semicolon = []byte{';'}
  // of the error handler, if there was one installed.
  //
  func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-       if S.pendingComment.Line > 0 {
-               // "consume" pending comment
-               S.pos = S.pendingComment;
-               S.offset = S.pos.Offset + 1;
-               S.ch = '/';
-               S.pendingComment.Line = 0;
-       }
-
  scanAgain:
-       // skip white space
-       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
-               S.next()
-       }
+       S.skipWhitespace();
  
         // current token start
         insertSemi := false;
@@ -462,8 +500,6 @@ scanAgain:
                 switch tok {
                 case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
                         insertSemi = true
-               default:
-                       insertSemi = false
                 }
         case digitVal(ch) < 10:
                 insertSemi = true;
@@ -474,7 +510,10 @@ scanAgain:
                 case -1:
                         tok = token.EOF
                 case '\n':
-                       S.insertSemi = false;
+                       // we only reach here of S.insertSemi was
+                       // set in the first place and exited early
+                       // from S.skipWhitespace()
+                       S.insertSemi = false;   // newline consumed
                         return pos, token.SEMICOLON, semicolon;
                 case '"':
                         insertSemi = true;
@@ -537,31 +576,17 @@ scanAgain:
                 case '/':
                         if S.ch == '/' || S.ch == '*' {
                                 // comment
-                               newline := false;
-                               if S.insertSemi {
-                                       if S.ch == '/' {
-                                               // a line comment acts like a newline
-                                               newline = true
-                                       } else {
-                                               // a general comment may act like a newline
-                                               S.scanComment(pos);
-                                               newline = pos.Line < S.pos.Line;
-                                       }
-                               } else {
-                                       S.scanComment(pos)
-                               }
-                               if newline {
-                                       // insert a semicolon and retain pending comment
-                                       S.insertSemi = false;
-                                       S.pendingComment = pos;
+                               if S.insertSemi && S.findNewline(pos) {
+                                       S.insertSemi = false;   // newline consumed
                                         return pos, token.SEMICOLON, semicolon;
-                               } else if S.mode&ScanComments == 0 {
+                               }
+                               S.scanComment(pos);
+                               if S.mode&ScanComments == 0 {
                                         // skip comment
-                                       goto scanAgain
-                               } else {
-                                       insertSemi = S.insertSemi;      // preserve insertSemi info
-                                       tok = token.COMMENT;
+                                       S.insertSemi = false;   // newline consumed
+                                       goto scanAgain;
                                 }
+                               tok = token.COMMENT;
                         } else {
                                 tok = S.switch2(token.QUO, token.QUO_ASSIGN)
                         }
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go

index ddaaab27fd2d7d593a8a2c94a077c7d5ae3efcd7..b6d7e99ca486383ad6d9dbfa74e33bbef4a7ba48 100644 (file)
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -284,6 +284,7 @@ func checkSemi(t *testing.T, line string, mode uint) {
  var lines = []string{
         // the $ character indicates where a semicolon is expected
         "",
+       "$;",
         "foo$\n",
         "123$\n",
         "1.2$\n",
@@ -380,16 +381,17 @@ var lines = []string{
         "foo$//comment\n",
         "foo$/*comment*/\n",
         "foo$/*\n*/",
+       "foo$/*comment*/    \n",
+       "foo$/*\n*/    ",
         "foo    $// comment\n",
         "foo    $/*comment*/\n",
         "foo    $/*\n*/",
  
-       // TODO(gri): These need to insert the semicolon *before* the
-       //            first comment which requires arbitrary far look-
-       //            ahead. Only relevant for gofmt placement of
-       //            comments.
-       "foo    /*comment*/    $\n",
-       "foo    /*0*/ /*1*/ $/*2*/\n",
+       "foo    $/*comment*/\n",
+       "foo    $/*0*/ /*1*/ /*2*/\n",
+       "foo    $/*comment*/    \n",
+       "foo    $/*0*/ /*1*/ /*2*/    \n",
+       "foo    $/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa",
  }
author	Robert Griesemer <gri@golang.org>
	Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
committer	Robert Griesemer <gri@golang.org>
	Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
src/pkg/go/scanner/scanner.go		patch \| blob \| history
src/pkg/go/scanner/scanner_test.go		patch \| blob \| history