From e64d337726542492f5a3610229e4e112eda77389 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Wed, 30 Mar 2011 15:26:53 -0700 Subject: [PATCH] scanner: treat line comments like in Go - don't consume '\n' as part of line comment (otherwise grammars where '\n' are tokens won't see them after a line comment) - permit line comments to end in EOF R=r CC=golang-dev https://golang.org/cl/4277089 --- src/pkg/scanner/scanner.go | 43 ++++++++------------- src/pkg/scanner/scanner_test.go | 68 ++++++++++++++++++++++----------- 2 files changed, 62 insertions(+), 49 deletions(-) diff --git a/src/pkg/scanner/scanner.go b/src/pkg/scanner/scanner.go index 2396cdd9a1..560e595b45 100644 --- a/src/pkg/scanner/scanner.go +++ b/src/pkg/scanner/scanner.go @@ -331,7 +331,7 @@ func (s *Scanner) error(msg string) { s.Error(s, msg) return } - fmt.Fprintf(os.Stderr, "%s: %s", s.Position, msg) + fmt.Fprintf(os.Stderr, "%s: %s\n", s.Position, msg) } @@ -503,41 +503,32 @@ func (s *Scanner) scanChar() { } -func (s *Scanner) scanLineComment() { - ch := s.next() // read character after "//" - for ch != '\n' { - if ch < 0 { - s.error("comment not terminated") - return +func (s *Scanner) scanComment(ch int) int { + // ch == '/' || ch == '*' + if ch == '/' { + // line comment + ch = s.next() // read character after "//" + for ch != '\n' && ch >= 0 { + ch = s.next() } - ch = s.next() + return ch } -} - -func (s *Scanner) scanGeneralComment() { - ch := s.next() // read character after "/*" + // general comment + ch = s.next() // read character after "/*" for { if ch < 0 { s.error("comment not terminated") - return + break } ch0 := ch ch = s.next() if ch0 == '*' && ch == '/' { + ch = s.next() break } } -} - - -func (s *Scanner) scanComment(ch int) { - // ch == '/' || ch == '*' - if ch == '/' { - s.scanLineComment() - return - } - s.scanGeneralComment() + return ch } @@ -619,13 +610,11 @@ redo: if (ch == '/' || ch == '*') && s.Mode&ScanComments != 0 { if s.Mode&SkipComments != 0 { s.tokPos = -1 // don't collect token text - s.scanComment(ch) - ch = s.next() + ch = s.scanComment(ch) goto redo } - s.scanComment(ch) + ch = s.scanComment(ch) tok = Comment - ch = s.next() } case '`': if s.Mode&ScanRawStrings != 0 { diff --git a/src/pkg/scanner/scanner_test.go b/src/pkg/scanner/scanner_test.go index 002252de8a..cf9ad01111 100644 --- a/src/pkg/scanner/scanner_test.go +++ b/src/pkg/scanner/scanner_test.go @@ -77,15 +77,15 @@ type token struct { var f100 = "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" var tokenList = []token{ - {Comment, "// line comments\n"}, - {Comment, "//\n"}, - {Comment, "////\n"}, - {Comment, "// comment\n"}, - {Comment, "// /* comment */\n"}, - {Comment, "// // comment //\n"}, - {Comment, "//" + f100 + "\n"}, - - {Comment, "// general comments\n"}, + {Comment, "// line comments"}, + {Comment, "//"}, + {Comment, "////"}, + {Comment, "// comment"}, + {Comment, "// /* comment */"}, + {Comment, "// // comment //"}, + {Comment, "//" + f100}, + + {Comment, "// general comments"}, {Comment, "/**/"}, {Comment, "/***/"}, {Comment, "/* comment */"}, @@ -94,7 +94,7 @@ var tokenList = []token{ {Comment, "/*\n comment\n*/"}, {Comment, "/*" + f100 + "*/"}, - {Comment, "// identifiers\n"}, + {Comment, "// identifiers"}, {Ident, "a"}, {Ident, "a0"}, {Ident, "foobar"}, @@ -116,21 +116,21 @@ var tokenList = []token{ {Ident, "bar9876"}, {Ident, f100}, - {Comment, "// decimal ints\n"}, + {Comment, "// decimal ints"}, {Int, "0"}, {Int, "1"}, {Int, "9"}, {Int, "42"}, {Int, "1234567890"}, - {Comment, "// octal ints\n"}, + {Comment, "// octal ints"}, {Int, "00"}, {Int, "01"}, {Int, "07"}, {Int, "042"}, {Int, "01234567"}, - {Comment, "// hexadecimal ints\n"}, + {Comment, "// hexadecimal ints"}, {Int, "0x0"}, {Int, "0x1"}, {Int, "0xf"}, @@ -144,7 +144,7 @@ var tokenList = []token{ {Int, "0X123456789abcDEF"}, {Int, "0X" + f100}, - {Comment, "// floats\n"}, + {Comment, "// floats"}, {Float, "0."}, {Float, "1."}, {Float, "42."}, @@ -174,7 +174,7 @@ var tokenList = []token{ {Float, "42E+10"}, {Float, "01234567890E-10"}, - {Comment, "// chars\n"}, + {Comment, "// chars"}, {Char, `' '`}, {Char, `'a'`}, {Char, `'本'`}, @@ -195,7 +195,7 @@ var tokenList = []token{ {Char, `'\U00000000'`}, {Char, `'\U0000ffAB'`}, - {Comment, "// strings\n"}, + {Comment, "// strings"}, {String, `" "`}, {String, `"a"`}, {String, `"本"`}, @@ -217,13 +217,13 @@ var tokenList = []token{ {String, `"\U0000ffAB"`}, {String, `"` + f100 + `"`}, - {Comment, "// raw strings\n"}, + {Comment, "// raw strings"}, {String, "``"}, {String, "`\\`"}, {String, "`" + "\n\n/* foobar */\n\n" + "`"}, {String, "`" + f100 + "`"}, - {Comment, "// individual characters\n"}, + {Comment, "// individual characters"}, // NUL character is not allowed {'\x01', "\x01"}, {' ' - 1, string(' ' - 1)}, @@ -276,7 +276,7 @@ func countNewlines(s string) int { func testScan(t *testing.T, mode uint) { - s := new(Scanner).Init(makeSource(" \t%s\t\n\r")) + s := new(Scanner).Init(makeSource(" \t%s\n")) s.Mode = mode tok := s.Scan() line := 1 @@ -287,7 +287,7 @@ func testScan(t *testing.T, mode uint) { } line += countNewlines(k.text) + 1 // each token is on a new line } - checkTok(t, s, line, tok, -1, "") + checkTok(t, s, line, tok, EOF, "") } @@ -317,6 +317,10 @@ func TestPosition(t *testing.T) { pos.Line += countNewlines(k.text) + 1 // each token is on a new line s.Scan() } + // make sure there were no token-internal errors reported by scanner + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } } @@ -336,6 +340,9 @@ func TestScanZeroMode(t *testing.T) { if tok != EOF { t.Fatalf("tok = %s, want EOF", TokenString(tok)) } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } } @@ -350,6 +357,9 @@ func testScanSelectedMode(t *testing.T, mode uint, class int) { } tok = s.Scan() } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } } @@ -367,7 +377,7 @@ func TestScanSelectedMask(t *testing.T) { func TestScanNext(t *testing.T) { - s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n}")) + s := new(Scanner).Init(bytes.NewBufferString("if a == bcd /* comment */ {\n\ta += c\n} // line comment ending in eof")) checkTok(t, s, 1, s.Scan(), Ident, "if") checkTok(t, s, 1, s.Scan(), Ident, "a") checkTok(t, s, 1, s.Scan(), '=', "=") @@ -382,6 +392,9 @@ func TestScanNext(t *testing.T) { checkTok(t, s, 2, s.Scan(), Ident, "c") checkTok(t, s, 3, s.Scan(), '}', "}") checkTok(t, s, 3, s.Scan(), -1, "") + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } } @@ -441,7 +454,6 @@ func TestError(t *testing.T) { testError(t, `"\'"`, "illegal char escape", String) testError(t, `"abc`, "literal not terminated", String) testError(t, "`abc", "literal not terminated", String) - testError(t, `//`, "comment not terminated", EOF) testError(t, `/*/`, "comment not terminated", EOF) testError(t, `"abc`+"\x00"+`def"`, "illegal character NUL", String) testError(t, `"abc`+"\xff"+`def"`, "illegal UTF-8 encoding", String) @@ -493,6 +505,9 @@ func TestPos(t *testing.T) { for i := 10; i > 0; i-- { checkScanPos(t, s, 1, 2, 1, EOF) } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } // corner case: source with only a single character s = new(Scanner).Init(bytes.NewBufferString("本")) @@ -502,6 +517,9 @@ func TestPos(t *testing.T) { for i := 10; i > 0; i-- { checkScanPos(t, s, 3, 1, 2, EOF) } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } // positions after calling Next s = new(Scanner).Init(bytes.NewBufferString(" foo६४ \n\n本語\n")) @@ -524,6 +542,9 @@ func TestPos(t *testing.T) { for i := 10; i > 0; i-- { checkScanPos(t, s, 22, 4, 1, EOF) } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } // positions after calling Scan s = new(Scanner).Init(bytes.NewBufferString("abc\n本語\n\nx")) @@ -543,4 +564,7 @@ func TestPos(t *testing.T) { for i := 10; i > 0; i-- { checkScanPos(t, s, 13, 4, 2, EOF) } + if s.ErrorCount != 0 { + t.Errorf("%d errors", s.ErrorCount) + } } -- 2.48.1