mode uint; // scanning mode
// scanning state
- pos token.Position; // previous reading position (position before ch)
- offset int; // current reading offset (position after ch)
- ch int; // one char look-ahead
+ pos token.Position; // previous reading position (position before ch)
+ offset int; // current reading offset (position after ch)
+ ch int; // one char look-ahead
+ insertSemi bool; // insert a semicolon before next newline
+ pendingComment token.Position; // valid if pendingComment.Line > 0
// public state - ok to modify
ErrorCount int; // number of errors encountered
const (
ScanComments = 1 << iota; // return comments as COMMENT tokens
AllowIllegalChars; // do not report an error for illegal chars
+ InsertSemis; // automatically insert semicolons
)
}
+var semicolon = []byte{';'}
+
// Scan scans the next token and returns the token position pos,
// the token tok, and the literal text lit corresponding to the
// token. The source end is indicated by token.EOF.
// of the error handler, if there was one installed.
//
func (S *Scanner) Scan() (pos token.Position, tok token.Token, lit []byte) {
-scan_again:
+ if S.pendingComment.Line > 0 {
+ // "consume" pending comment
+ S.pos = S.pendingComment;
+ S.offset = S.pos.Offset + 1;
+ S.ch = '/';
+ S.pendingComment.Line = 0;
+ }
+
+scanAgain:
// skip white space
- for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' || S.ch == '\r' {
+ for S.ch == ' ' || S.ch == '\t' || S.ch == '\n' && !S.insertSemi || S.ch == '\r' {
S.next()
}
// current token start
+ insertSemi := false;
pos, tok = S.pos, token.ILLEGAL;
// determine token value
switch ch := S.ch; {
case isLetter(ch):
- tok = S.scanIdentifier()
+ tok = S.scanIdentifier();
+ switch tok {
+ case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
+ insertSemi = true
+ default:
+ insertSemi = false
+ }
case digitVal(ch) < 10:
- tok = S.scanNumber(false)
+ insertSemi = true;
+ tok = S.scanNumber(false);
default:
S.next(); // always make progress
switch ch {
case -1:
tok = token.EOF
+ case '\n':
+ S.insertSemi = false;
+ return pos, token.SEMICOLON, semicolon;
case '"':
+ insertSemi = true;
tok = token.STRING;
S.scanString(pos);
case '\'':
+ insertSemi = true;
tok = token.CHAR;
S.scanChar(pos);
case '`':
+ insertSemi = true;
tok = token.STRING;
S.scanRawString(pos);
case ':':
tok = S.switch2(token.COLON, token.DEFINE)
case '.':
if digitVal(S.ch) < 10 {
- tok = S.scanNumber(true)
+ insertSemi = true;
+ tok = S.scanNumber(true);
} else if S.ch == '.' {
S.next();
if S.ch == '.' {
case '(':
tok = token.LPAREN
case ')':
- tok = token.RPAREN
+ insertSemi = true;
+ tok = token.RPAREN;
case '[':
tok = token.LBRACK
case ']':
- tok = token.RBRACK
+ insertSemi = true;
+ tok = token.RBRACK;
case '{':
tok = token.LBRACE
case '}':
- tok = token.RBRACE
+ insertSemi = true;
+ tok = token.RBRACE;
case '+':
- tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
+ tok = S.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC);
+ if tok == token.INC {
+ insertSemi = true
+ }
case '-':
- tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
+ tok = S.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC);
+ if tok == token.DEC {
+ insertSemi = true
+ }
case '*':
tok = S.switch2(token.MUL, token.MUL_ASSIGN)
case '/':
if S.ch == '/' || S.ch == '*' {
- S.scanComment(pos);
- tok = token.COMMENT;
- if S.mode&ScanComments == 0 {
- goto scan_again
+ // comment
+ newline := false;
+ if S.insertSemi {
+ if S.ch == '/' {
+ // a line comment acts like a newline
+ newline = true
+ } else {
+ // a general comment may act like a newline
+ S.scanComment(pos);
+ newline = pos.Line < S.pos.Line;
+ }
+ } else {
+ S.scanComment(pos)
+ }
+ if newline {
+ // insert a semicolon and retain pending comment
+ S.insertSemi = false;
+ S.pendingComment = pos;
+ return pos, token.SEMICOLON, semicolon;
+ } else if S.mode&ScanComments == 0 {
+ // skip comment
+ goto scanAgain
+ } else {
+ insertSemi = S.insertSemi; // preserve insertSemi info
+ tok = token.COMMENT;
}
} else {
tok = S.switch2(token.QUO, token.QUO_ASSIGN)
if S.mode&AllowIllegalChars == 0 {
S.error(pos, "illegal character "+charString(ch))
}
+ insertSemi = S.insertSemi; // preserve insertSemi info
}
}
+ if S.mode&InsertSemis != 0 {
+ S.insertSemi = insertSemi
+ }
return pos, tok, S.src[pos.Offset:S.pos.Offset];
}
}
checkPos(t, lit, pos, epos);
if tok != e.tok {
- t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String())
+ t.Errorf("bad token for %q: got %s, expected %s", lit, tok.String(), e.tok.String())
}
if e.tok.IsLiteral() && lit != e.lit {
- t.Errorf("bad literal for %s: got %s, expected %s", lit, lit, e.lit)
+ t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, e.lit)
}
if tokenclass(tok) != e.class {
- t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class)
+ t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
}
epos.Offset += len(lit) + len(whitespace);
epos.Line += NewlineCount(lit) + whitespace_linecount;
}
+func getTok(_ token.Position, tok token.Token, _ []byte) token.Token {
+ return tok
+}
+
+
+func checkSemi(t *testing.T, line string, mode uint) {
+ var S Scanner;
+ S.Init("TestSemis", strings.Bytes(line), nil, mode);
+ pos, tok, lit := S.Scan();
+ for tok != token.EOF {
+ if tok == token.ILLEGAL {
+ // next token must be a semicolon
+ offs := pos.Offset + 1;
+ pos, tok, lit = S.Scan();
+ if tok == token.SEMICOLON {
+ if pos.Offset != offs {
+ t.Errorf("bad offset for %q: got %d, expected %d", line, pos.Offset, offs)
+ }
+ if string(lit) != ";" {
+ t.Errorf(`bad literal for %q: got %q, expected ";"`, line, lit)
+ }
+ } else {
+ t.Errorf("bad token for %q: got %s, expected ;", line, tok.String())
+ }
+ } else if tok == token.SEMICOLON {
+ t.Errorf("bad token for %q: got ;, expected no ;", line)
+ }
+ pos, tok, lit = S.Scan();
+ }
+}
+
+
+var lines = []string{
+ // the $ character indicates where a semicolon is expected
+ "",
+ "foo$\n",
+ "123$\n",
+ "1.2$\n",
+ "'x'$\n",
+ `"x"` + "$\n",
+ "`x`$\n",
+
+ "+\n",
+ "-\n",
+ "*\n",
+ "/\n",
+ "%\n",
+
+ "&\n",
+ "|\n",
+ "^\n",
+ "<<\n",
+ ">>\n",
+ "&^\n",
+
+ "+=\n",
+ "-=\n",
+ "*=\n",
+ "/=\n",
+ "%=\n",
+
+ "&=\n",
+ "|=\n",
+ "^=\n",
+ "<<=\n",
+ ">>=\n",
+ "&^=\n",
+
+ "&&\n",
+ "||\n",
+ "<-\n",
+ "++$\n",
+ "--$\n",
+
+ "==\n",
+ "<\n",
+ ">\n",
+ "=\n",
+ "!\n",
+
+ "!=\n",
+ "<=\n",
+ ">=\n",
+ ":=\n",
+ "...\n",
+
+ "(\n",
+ "[\n",
+ "{\n",
+ ",\n",
+ ".\n",
+
+ ")$\n",
+ "]$\n",
+ "}$\n",
+ "$;\n",
+ ":\n",
+
+ "break$\n",
+ "case\n",
+ "chan\n",
+ "const\n",
+ "continue$\n",
+
+ "default\n",
+ "defer\n",
+ "else\n",
+ "fallthrough$\n",
+ "for\n",
+
+ "func\n",
+ "go\n",
+ "goto\n",
+ "if\n",
+ "import\n",
+
+ "interface\n",
+ "map\n",
+ "package\n",
+ "range\n",
+ "return$\n",
+
+ "select\n",
+ "struct\n",
+ "switch\n",
+ "type\n",
+ "var\n",
+
+ "foo$//comment\n",
+ "foo$/*comment*/\n",
+ "foo$/*\n*/",
+ "foo $// comment\n",
+ "foo $/*comment*/\n",
+ "foo $/*\n*/",
+
+ // TODO(gri): These need to insert the semicolon *before* the
+ // first comment which requires arbitrary far look-
+ // ahead. Only relevant for gofmt placement of
+ // comments.
+ "foo /*comment*/ $\n",
+ "foo /*0*/ /*1*/ $/*2*/\n",
+}
+
+
+func TestSemis(t *testing.T) {
+ for _, line := range lines {
+ checkSemi(t, line, AllowIllegalChars|InsertSemis)
+ }
+ for _, line := range lines {
+ checkSemi(t, line, AllowIllegalChars|InsertSemis|ScanComments)
+ }
+}
+
+
type seg struct {
srcline string; // a line of source text
filename string; // filename for current token