]> Cypherpunks repositories - gostls13.git/commitdiff
fix TODO: insert semicolons before any sequence of comments
authorRobert Griesemer <gri@golang.org>
Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
committerRobert Griesemer <gri@golang.org>
Tue, 15 Dec 2009 16:41:50 +0000 (08:41 -0800)
that introduce the newline (important for correct placement
of comments with gofmt when parsing new syntax)

R=rsc
https://golang.org/cl/179055

src/pkg/go/scanner/scanner.go
src/pkg/go/scanner/scanner_test.go

index 386cdb0e9f0f8ade8711874916b8a254a1d66082..026ae99760e63c9182edded28f2ec7d20841fef3 100644 (file)
@@ -33,7 +33,6 @@ type Scanner struct {
        offset          int;            // current reading offset (position after ch)
        ch              int;            // one char look-ahead
        insertSemi      bool;           // insert a semicolon before next newline
-       pendingComment  token.Position; // valid if pendingComment.Line > 0
 
        // public state - ok to modify
        ErrorCount      int;    // number of errors encountered
@@ -151,7 +150,7 @@ func (S *Scanner) scanComment(pos token.Position) {
                for S.ch >= 0 {
                        S.next();
                        if S.ch == '\n' {
-                               // '\n' is not part of the comment
+                               // '\n' is not part of the comment for purposes of scanning
                                // (the comment ends on the same line where it started)
                                if pos.Column == 1 {
                                        text := S.src[pos.Offset+2 : S.pos.Offset];
@@ -190,6 +189,49 @@ func (S *Scanner) scanComment(pos token.Position) {
 }
 
 
+func (S *Scanner) findNewline(pos token.Position) bool {
+       // first '/' already consumed; assume S.ch == '/' || S.ch == '*'
+
+       // read ahead until a newline or non-comment token is found
+       newline := false;
+       for pos1 := pos; S.ch >= 0; {
+               if S.ch == '/' {
+                       //-style comment always contains a newline
+                       newline = true;
+                       break;
+               }
+               S.scanComment(pos1);
+               if pos1.Line < S.pos.Line {
+                       /*-style comment contained a newline */
+                       newline = true;
+                       break;
+               }
+               S.skipWhitespace();
+               if S.ch == '\n' {
+                       newline = true;
+                       break;
+               }
+               if S.ch != '/' {
+                       // non-comment token
+                       break
+               }
+               pos1 = S.pos;
+               S.next();
+               if S.ch != '/' && S.ch != '*' {
+                       // non-comment token
+                       break
+               }
+       }
+
+       // reset position
+       S.pos = pos;
+       S.offset = pos.Offset + 1;
+       S.ch = '/';
+
+       return newline;
+}
+
+
 func isLetter(ch int) bool {
        return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
 }
@@ -378,6 +420,13 @@ func (S *Scanner) scanRawString(pos token.Position) {
 }
 
 
+func (S *Scanner) skipWhitespace() {
+       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
+               S.next()
+       }
+}
+
+
 // Helper functions for scanning multi-byte tokens such as >> += >>= .
 // Different routines recognize different length tok_i based on matches
 // of ch_i. If a token ends in '=', the result is tok1 or tok3
@@ -437,19 +486,8 @@ var semicolon = []byte{';'}
 // of the error handler, if there was one installed.
 //
 func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-       if S.pendingComment.Line > 0 {
-               // "consume" pending comment
-               S.pos = S.pendingComment;
-               S.offset = S.pos.Offset + 1;
-               S.ch = '/';
-               S.pendingComment.Line = 0;
-       }
-
 scanAgain:
-       // skip white space
-       for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
-               S.next()
-       }
+       S.skipWhitespace();
 
        // current token start
        insertSemi := false;
@@ -462,8 +500,6 @@ scanAgain:
                switch tok {
                case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
                        insertSemi = true
-               default:
-                       insertSemi = false
                }
        case digitVal(ch) < 10:
                insertSemi = true;
@@ -474,7 +510,10 @@ scanAgain:
                case -1:
                        tok = token.EOF
                case '\n':
-                       S.insertSemi = false;
+                       // we only reach here of S.insertSemi was
+                       // set in the first place and exited early
+                       // from S.skipWhitespace()
+                       S.insertSemi = false;   // newline consumed
                        return pos, token.SEMICOLON, semicolon;
                case '"':
                        insertSemi = true;
@@ -537,31 +576,17 @@ scanAgain:
                case '/':
                        if S.ch == '/' || S.ch == '*' {
                                // comment
-                               newline := false;
-                               if S.insertSemi {
-                                       if S.ch == '/' {
-                                               // a line comment acts like a newline
-                                               newline = true
-                                       } else {
-                                               // a general comment may act like a newline
-                                               S.scanComment(pos);
-                                               newline = pos.Line < S.pos.Line;
-                                       }
-                               } else {
-                                       S.scanComment(pos)
-                               }
-                               if newline {
-                                       // insert a semicolon and retain pending comment
-                                       S.insertSemi = false;
-                                       S.pendingComment = pos;
+                               if S.insertSemi && S.findNewline(pos) {
+                                       S.insertSemi = false;   // newline consumed
                                        return pos, token.SEMICOLON, semicolon;
-                               } else if S.mode&ScanComments == 0 {
+                               }
+                               S.scanComment(pos);
+                               if S.mode&ScanComments == 0 {
                                        // skip comment
-                                       goto scanAgain
-                               } else {
-                                       insertSemi = S.insertSemi;      // preserve insertSemi info
-                                       tok = token.COMMENT;
+                                       S.insertSemi = false;   // newline consumed
+                                       goto scanAgain;
                                }
+                               tok = token.COMMENT;
                        } else {
                                tok = S.switch2(token.QUO, token.QUO_ASSIGN)
                        }
index ddaaab27fd2d7d593a8a2c94a077c7d5ae3efcd7..b6d7e99ca486383ad6d9dbfa74e33bbef4a7ba48 100644 (file)
@@ -284,6 +284,7 @@ func checkSemi(t *testing.T, line string, mode uint) {
 var lines = []string{
        // the $ character indicates where a semicolon is expected
        "",
+       "$;",
        "foo$\n",
        "123$\n",
        "1.2$\n",
@@ -380,16 +381,17 @@ var lines = []string{
        "foo$//comment\n",
        "foo$/*comment*/\n",
        "foo$/*\n*/",
+       "foo$/*comment*/    \n",
+       "foo$/*\n*/    ",
        "foo    $// comment\n",
        "foo    $/*comment*/\n",
        "foo    $/*\n*/",
 
-       // TODO(gri): These need to insert the semicolon *before* the
-       //            first comment which requires arbitrary far look-
-       //            ahead. Only relevant for gofmt placement of
-       //            comments.
-       "foo    /*comment*/    $\n",
-       "foo    /*0*/ /*1*/ $/*2*/\n",
+       "foo    $/*comment*/\n",
+       "foo    $/*0*/ /*1*/ /*2*/\n",
+       "foo    $/*comment*/    \n",
+       "foo    $/*0*/ /*1*/ /*2*/    \n",
+       "foo    $/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa",
 }