}
-func (S *Scanner) findNewline(pos token.Position) bool {
+func (S *Scanner) findLineEnd(pos token.Position) bool {
// first '/' already consumed; assume S.ch == '/' || S.ch == '*'
- // read ahead until a newline or non-comment token is found
- newline := false
+ // read ahead until a newline, EOF, or non-comment token is found
+ lineend := false
for pos1 := pos; S.ch >= 0; {
if S.ch == '/' {
//-style comment always contains a newline
- newline = true
+ lineend = true
break
}
S.scanComment(pos1)
if pos1.Line < S.pos.Line {
/*-style comment contained a newline */
- newline = true
+ lineend = true
break
}
S.skipWhitespace() // S.insertSemi is set
- if S.ch == '\n' {
- newline = true
+ if S.ch < 0 || S.ch == '\n' {
+ // line end
+ lineend = true
break
}
if S.ch != '/' {
}
}
- // reset position to where it was upon calling findNewline
+ // reset position to where it was upon calling findLineEnd
S.pos = pos
S.offset = pos.Offset + 1
S.next()
- return newline
+ return lineend
}
//
// If the returned token is token.SEMICOLON, the corresponding
// literal value is ";" if the semicolon was present in the source,
-// and "\n" if the semicolon was inserted because of a newline.
+// and "\n" if the semicolon was inserted because of a newline or
+// at EOF.
//
// For more tolerant parsing, Scan will return a valid token if
// possible even if a syntax error was encountered. Thus, even
S.next() // always make progress
switch ch {
case -1:
+ if S.insertSemi {
+ S.insertSemi = false // EOF consumed
+ return pos, token.SEMICOLON, newline
+ }
tok = token.EOF
case '\n':
// we only reach here if S.insertSemi was
case '/':
if S.ch == '/' || S.ch == '*' {
// comment
- if S.insertSemi && S.findNewline(pos) {
+ if S.insertSemi && S.findLineEnd(pos) {
// reset position to the beginning of the comment
S.pos = pos
S.offset = pos.Offset + 1
"foo $/*comment*/\n",
"foo $/*\n*/",
- "foo $/*comment*/\n",
"foo $/*0*/ /*1*/ /*2*/\n",
"foo $/*comment*/ \n",
"foo $/*0*/ /*1*/ /*2*/ \n",
- "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa",
+ "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n",
"package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
}
func TestSemis(t *testing.T) {
for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis)
- }
- for _, line := range lines {
checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+
+ // if the input ended in newlines, the input must tokenize the
+ // same with or without those newlines
+ for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
+ checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis)
+ checkSemi(t, line[0:i], AllowIllegalChars|InsertSemis|ScanComments)
+ }
}
}