// a regular expression that matches the expected error message.
// The special form /* ERROR HERE "rx" */ must be used for error
// messages that appear immediately after a token, rather than at
-// a token's position.
-var errRx = regexp.MustCompile(`^/\* *ERROR *(HERE)? *"([^"]*)" *\*/$`)
+// a token's position, and ERROR AFTER means after the comment
+// (e.g. at end of line).
+var errRx = regexp.MustCompile(`^/\* *ERROR *(HERE|AFTER)? *"([^"]*)" *\*/$`)
// expectedErrors collects the regular expressions of ERROR comments found
// in files and returns them as a map of error positions to error messages.
case token.COMMENT:
s := errRx.FindStringSubmatch(lit)
if len(s) == 3 {
- pos := prev
if s[1] == "HERE" {
- pos = here
+ pos = here // start of comment
+ } else if s[1] == "AFTER" {
+ pos += token.Pos(len(lit)) // end of comment
+ } else {
+ pos = prev // token prior to comment
}
errors[pos] = s[2]
}
// The comment is on same line as the previous token; it
// cannot be a lead comment but may be a line comment.
comment, endline = p.consumeCommentGroup(0)
- if p.file.Line(p.pos) != endline || p.tok == token.EOF {
+ if p.file.Line(p.pos) != endline || p.tok == token.SEMICOLON || p.tok == token.EOF {
// The next token is on a different line, thus
// the last comment group is a line comment.
p.lineComment = comment
return p.expect(tok)
}
-func (p *parser) expectSemi() {
+// expectSemi consumes a semicolon and returns the applicable line comment.
+func (p *parser) expectSemi() (comment *ast.CommentGroup) {
// semicolon is optional before a closing ')' or '}'
if p.tok != token.RPAREN && p.tok != token.RBRACE {
switch p.tok {
p.errorExpected(p.pos, "';'")
fallthrough
case token.SEMICOLON:
- p.next()
+ if p.lit == ";" {
+ // explicit semicolon
+ p.next()
+ comment = p.lineComment // use following comments
+ } else {
+ // artificial semicolon
+ comment = p.lineComment // use preceding comments
+ p.next()
+ }
+ return comment
default:
p.errorExpected(p.pos, "';'")
p.advance(stmtStart)
}
}
+ return nil
}
func (p *parser) atComma(context string, follow token.Token) bool {
p.next()
}
- p.expectSemi() // call before accessing p.linecomment
+ comment := p.expectSemi()
- field := &ast.Field{Doc: doc, Names: names, Type: typ, Tag: tag, Comment: p.lineComment}
+ field := &ast.Field{Doc: doc, Names: names, Type: typ, Tag: tag, Comment: comment}
return field
}
if f.Names == nil {
f.Type = p.embeddedElem(f.Type)
}
- p.expectSemi()
- f.Comment = p.lineComment
+ f.Comment = p.expectSemi()
list = append(list, f)
case p.tok == token.TILDE:
typ := p.embeddedElem(nil)
- p.expectSemi()
- comment := p.lineComment
+ comment := p.expectSemi()
list = append(list, &ast.Field{Type: typ, Comment: comment})
default:
if t := p.tryIdentOrType(); t != nil {
typ := p.embeddedElem(t)
- p.expectSemi()
- comment := p.lineComment
+ comment := p.expectSemi()
list = append(list, &ast.Field{Type: typ, Comment: comment})
} else {
break parseElements
p.error(pos, "missing import path")
p.advance(exprEnd)
}
- p.expectSemi() // call before accessing p.linecomment
+ comment := p.expectSemi()
// collect imports
spec := &ast.ImportSpec{
Doc: doc,
Name: ident,
Path: &ast.BasicLit{ValuePos: pos, Kind: token.STRING, Value: path},
- Comment: p.lineComment,
+ Comment: comment,
}
p.imports = append(p.imports, spec)
default:
panic("unreachable")
}
- p.expectSemi() // call before accessing p.linecomment
+ comment := p.expectSemi()
spec := &ast.ValueSpec{
Doc: doc,
Names: idents,
Type: typ,
Values: values,
- Comment: p.lineComment,
+ Comment: comment,
}
return spec
}
spec.Type = p.parseType()
}
- p.expectSemi() // call before accessing p.linecomment
- spec.Comment = p.lineComment
+ spec.Comment = p.expectSemi()
return spec
}
F2 int // F2 line comment
// f3 lead comment
f3 int // f3 line comment
+
+ f4 int /* not a line comment */ ;
+ f5 int ; // f5 line comment
+ f6 int ; /* f6 line comment */
+ f7 int ; /*f7a*/ /*f7b*/ //f7c
}
`, ParseComments)
if err != nil {
checkFieldComments(t, f, "T.F1", "/* F1 lead comment *///", "/* F1 */// line comment")
checkFieldComments(t, f, "T.F2", "// F2 lead// comment", "// F2 line comment")
checkFieldComments(t, f, "T.f3", "// f3 lead comment", "// f3 line comment")
+ checkFieldComments(t, f, "T.f4", "", "")
+ checkFieldComments(t, f, "T.f5", "", "// f5 line comment")
+ checkFieldComments(t, f, "T.f6", "", "/* f6 line comment */")
+ checkFieldComments(t, f, "T.f7", "", "/*f7a*//*f7b*///f7c")
+
ast.FileExports(f)
checkFieldComments(t, f, "T.F1", "/* F1 lead comment *///", "/* F1 */// line comment")
checkFieldComments(t, f, "T.F2", "// F2 lead// comment", "// F2 line comment")
package p
var _ = []int{
- 0/* ERROR HERE "missing ','" */
+ 0/* ERROR AFTER "missing ','" */
}
var _ = []int{
0,
1,
2,
- 3/* ERROR HERE "missing ','" */
+ 3/* ERROR AFTER "missing ','" */
}
// 1:16 ( ""
// 1:17 IDENT "x"
// 1:18 ) ""
- // 1:20 ; "\n"
// 1:20 COMMENT "// Euler"
+ // 1:28 ; "\n"
}
mode Mode // scanning mode
// scanning state
- ch rune // current character
- offset int // character offset
- rdOffset int // reading offset (position after current character)
- lineOffset int // current line offset
- insertSemi bool // insert a semicolon before next newline
+ ch rune // current character
+ offset int // character offset
+ rdOffset int // reading offset (position after current character)
+ lineOffset int // current line offset
+ insertSemi bool // insert a semicolon before next newline
+ nlPos token.Pos // position of newline in preceding comment
// public state - ok to modify
ErrorCount int // number of errors encountered
s.error(offs, fmt.Sprintf(format, args...))
}
-func (s *Scanner) scanComment() string {
+// scanComment returns the text of the comment and (if nonzero)
+// the offset of the first newline within it, which implies a
+// /*...*/ comment.
+func (s *Scanner) scanComment() (string, int) {
// initial '/' already consumed; s.ch == '/' || s.ch == '*'
offs := s.offset - 1 // position of initial '/'
next := -1 // position immediately following the comment; < 0 means invalid comment
numCR := 0
+ nlOffset := 0 // offset of first newline within /*...*/ comment
if s.ch == '/' {
//-style comment
ch := s.ch
if ch == '\r' {
numCR++
+ } else if ch == '\n' && nlOffset == 0 {
+ nlOffset = s.offset
}
s.next()
if ch == '*' && s.ch == '/' {
lit = stripCR(lit, lit[1] == '*')
}
- return string(lit)
+ return string(lit), nlOffset
}
var prefix = []byte("line ")
return i + 1, int(n), err == nil
}
-func (s *Scanner) findLineEnd() bool {
- // initial '/' already consumed
-
- defer func(offs int) {
- // reset scanner state to where it was upon calling findLineEnd
- s.ch = '/'
- s.offset = offs
- s.rdOffset = offs + 1
- s.next() // consume initial '/' again
- }(s.offset - 1)
-
- // read ahead until a newline, EOF, or non-comment token is found
- for s.ch == '/' || s.ch == '*' {
- if s.ch == '/' {
- //-style comment always contains a newline
- return true
- }
- /*-style comment: look for newline */
- s.next()
- for s.ch >= 0 {
- ch := s.ch
- if ch == '\n' {
- return true
- }
- s.next()
- if ch == '*' && s.ch == '/' {
- s.next()
- break
- }
- }
- s.skipWhitespace() // s.insertSemi is set
- if s.ch < 0 || s.ch == '\n' {
- return true
- }
- if s.ch != '/' {
- // non-comment token
- return false
- }
- s.next() // consume '/'
- }
-
- return false
-}
-
func isLetter(ch rune) bool {
return 'a' <= lower(ch) && lower(ch) <= 'z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
}
// and thus relative to the file set.
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
scanAgain:
+ if s.nlPos.IsValid() {
+ // Return artificial ';' token after /*...*/ comment
+ // containing newline, at position of first newline.
+ pos, tok, lit = s.nlPos, token.SEMICOLON, "\n"
+ s.nlPos = token.NoPos
+ return
+ }
+
s.skipWhitespace()
// current token start
case '/':
if s.ch == '/' || s.ch == '*' {
// comment
- if s.insertSemi && s.findLineEnd() {
- // reset position to the beginning of the comment
- s.ch = '/'
- s.offset = s.file.Offset(pos)
- s.rdOffset = s.offset + 1
- s.insertSemi = false // newline consumed
- return pos, token.SEMICOLON, "\n"
+ comment, nlOffset := s.scanComment()
+ if s.insertSemi && nlOffset != 0 {
+ // For /*...*/ containing \n, return
+ // COMMENT then artificial SEMICOLON.
+ s.nlPos = s.file.Pos(nlOffset)
+ s.insertSemi = false
+ } else {
+ insertSemi = s.insertSemi // preserve insertSemi info
}
- comment := s.scanComment()
if s.mode&ScanComments == 0 {
// skip comment
- s.insertSemi = false // newline consumed
goto scanAgain
}
tok = token.COMMENT
lit = comment
} else {
+ // division
tok = s.switch2(token.QUO, token.QUO_ASSIGN)
}
case '%':
}
}
-func checkSemi(t *testing.T, line string, mode Mode) {
- var S Scanner
- file := fset.AddFile("TestSemis", fset.Base(), len(line))
- S.Init(file, []byte(line), nil, mode)
- pos, tok, lit := S.Scan()
- for tok != token.EOF {
- if tok == token.ILLEGAL {
- // the illegal token literal indicates what
- // kind of semicolon literal to expect
- semiLit := "\n"
- if lit[0] == '#' {
- semiLit = ";"
- }
- // next token must be a semicolon
- semiPos := file.Position(pos)
- semiPos.Offset++
- semiPos.Column++
- pos, tok, lit = S.Scan()
- if tok == token.SEMICOLON {
- if lit != semiLit {
- t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
- }
- checkPos(t, line, pos, semiPos)
- } else {
- t.Errorf("bad token for %q: got %s, expected ;", line, tok)
+func checkSemi(t *testing.T, input, want string, mode Mode) {
+ if mode&ScanComments == 0 {
+ want = strings.ReplaceAll(want, "COMMENT ", "")
+ want = strings.ReplaceAll(want, " COMMENT", "") // if at end
+ want = strings.ReplaceAll(want, "COMMENT", "") // if sole token
+ }
+
+ file := fset.AddFile("TestSemis", fset.Base(), len(input))
+ var scan Scanner
+ scan.Init(file, []byte(input), nil, mode)
+ var tokens []string
+ for {
+ pos, tok, lit := scan.Scan()
+ if tok == token.EOF {
+ break
+ }
+ if tok == token.SEMICOLON && lit != ";" {
+ // Artifical semicolon:
+ // assert that position is EOF or that of a newline.
+ off := file.Offset(pos)
+ if off != len(input) && input[off] != '\n' {
+ t.Errorf("scanning <<%s>>, got SEMICOLON at offset %d, want newline or EOF", input, off)
}
- } else if tok == token.SEMICOLON {
- t.Errorf("bad token for %q: got ;, expected no ;", line)
}
- pos, tok, lit = S.Scan()
+ lit = tok.String() // "\n" => ";"
+ tokens = append(tokens, lit)
+ }
+ if got := strings.Join(tokens, " "); got != want {
+ t.Errorf("scanning <<%s>>, got [%s], want [%s]", input, got, want)
}
}
-var lines = []string{
- // # indicates a semicolon present in the source
- // $ indicates an automatically inserted semicolon
- "",
- "\ufeff#;", // first BOM is ignored
- "#;",
- "foo$\n",
- "123$\n",
- "1.2$\n",
- "'x'$\n",
- `"x"` + "$\n",
- "`x`$\n",
-
- "+\n",
- "-\n",
- "*\n",
- "/\n",
- "%\n",
-
- "&\n",
- "|\n",
- "^\n",
- "<<\n",
- ">>\n",
- "&^\n",
-
- "+=\n",
- "-=\n",
- "*=\n",
- "/=\n",
- "%=\n",
-
- "&=\n",
- "|=\n",
- "^=\n",
- "<<=\n",
- ">>=\n",
- "&^=\n",
-
- "&&\n",
- "||\n",
- "<-\n",
- "++$\n",
- "--$\n",
-
- "==\n",
- "<\n",
- ">\n",
- "=\n",
- "!\n",
-
- "!=\n",
- "<=\n",
- ">=\n",
- ":=\n",
- "...\n",
-
- "(\n",
- "[\n",
- "{\n",
- ",\n",
- ".\n",
-
- ")$\n",
- "]$\n",
- "}$\n",
- "#;\n",
- ":\n",
-
- "break$\n",
- "case\n",
- "chan\n",
- "const\n",
- "continue$\n",
-
- "default\n",
- "defer\n",
- "else\n",
- "fallthrough$\n",
- "for\n",
-
- "func\n",
- "go\n",
- "goto\n",
- "if\n",
- "import\n",
-
- "interface\n",
- "map\n",
- "package\n",
- "range\n",
- "return$\n",
-
- "select\n",
- "struct\n",
- "switch\n",
- "type\n",
- "var\n",
-
- "foo$//comment\n",
- "foo$//comment",
- "foo$/*comment*/\n",
- "foo$/*\n*/",
- "foo$/*comment*/ \n",
- "foo$/*\n*/ ",
-
- "foo $// comment\n",
- "foo $// comment",
- "foo $/*comment*/\n",
- "foo $/*\n*/",
- "foo $/* */ /* \n */ bar$/**/\n",
- "foo $/*0*/ /*1*/ /*2*/\n",
-
- "foo $/*comment*/ \n",
- "foo $/*0*/ /*1*/ /*2*/ \n",
- "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n",
- "foo $/* an EOF terminates a line */",
- "foo $/* an EOF terminates a line */ /*",
- "foo $/* an EOF terminates a line */ //",
-
- "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
- "package main$",
+var semicolonTests = [...]struct{ input, want string }{
+ {"", ""},
+ {"\ufeff;", ";"}, // first BOM is ignored
+ {";", ";"},
+ {"foo\n", "IDENT ;"},
+ {"123\n", "INT ;"},
+ {"1.2\n", "FLOAT ;"},
+ {"'x'\n", "CHAR ;"},
+ {`"x"` + "\n", "STRING ;"},
+ {"`x`\n", "STRING ;"},
+
+ {"+\n", "+"},
+ {"-\n", "-"},
+ {"*\n", "*"},
+ {"/\n", "/"},
+ {"%\n", "%"},
+
+ {"&\n", "&"},
+ {"|\n", "|"},
+ {"^\n", "^"},
+ {"<<\n", "<<"},
+ {">>\n", ">>"},
+ {"&^\n", "&^"},
+
+ {"+=\n", "+="},
+ {"-=\n", "-="},
+ {"*=\n", "*="},
+ {"/=\n", "/="},
+ {"%=\n", "%="},
+
+ {"&=\n", "&="},
+ {"|=\n", "|="},
+ {"^=\n", "^="},
+ {"<<=\n", "<<="},
+ {">>=\n", ">>="},
+ {"&^=\n", "&^="},
+
+ {"&&\n", "&&"},
+ {"||\n", "||"},
+ {"<-\n", "<-"},
+ {"++\n", "++ ;"},
+ {"--\n", "-- ;"},
+
+ {"==\n", "=="},
+ {"<\n", "<"},
+ {">\n", ">"},
+ {"=\n", "="},
+ {"!\n", "!"},
+
+ {"!=\n", "!="},
+ {"<=\n", "<="},
+ {">=\n", ">="},
+ {":=\n", ":="},
+ {"...\n", "..."},
+
+ {"(\n", "("},
+ {"[\n", "["},
+ {"{\n", "{"},
+ {",\n", ","},
+ {".\n", "."},
+
+ {")\n", ") ;"},
+ {"]\n", "] ;"},
+ {"}\n", "} ;"},
+ {";\n", ";"},
+ {":\n", ":"},
+
+ {"break\n", "break ;"},
+ {"case\n", "case"},
+ {"chan\n", "chan"},
+ {"const\n", "const"},
+ {"continue\n", "continue ;"},
+
+ {"default\n", "default"},
+ {"defer\n", "defer"},
+ {"else\n", "else"},
+ {"fallthrough\n", "fallthrough ;"},
+ {"for\n", "for"},
+
+ {"func\n", "func"},
+ {"go\n", "go"},
+ {"goto\n", "goto"},
+ {"if\n", "if"},
+ {"import\n", "import"},
+
+ {"interface\n", "interface"},
+ {"map\n", "map"},
+ {"package\n", "package"},
+ {"range\n", "range"},
+ {"return\n", "return ;"},
+
+ {"select\n", "select"},
+ {"struct\n", "struct"},
+ {"switch\n", "switch"},
+ {"type\n", "type"},
+ {"var\n", "var"},
+
+ {"foo//comment\n", "IDENT COMMENT ;"},
+ {"foo//comment", "IDENT COMMENT ;"},
+ {"foo/*comment*/\n", "IDENT COMMENT ;"},
+ {"foo/*\n*/", "IDENT COMMENT ;"},
+ {"foo/*comment*/ \n", "IDENT COMMENT ;"},
+ {"foo/*\n*/ ", "IDENT COMMENT ;"},
+
+ {"foo // comment\n", "IDENT COMMENT ;"},
+ {"foo // comment", "IDENT COMMENT ;"},
+ {"foo /*comment*/\n", "IDENT COMMENT ;"},
+ {"foo /*\n*/", "IDENT COMMENT ;"},
+ {"foo /* */ /* \n */ bar/**/\n", "IDENT COMMENT COMMENT ; IDENT COMMENT ;"},
+ {"foo /*0*/ /*1*/ /*2*/\n", "IDENT COMMENT COMMENT COMMENT ;"},
+
+ {"foo /*comment*/ \n", "IDENT COMMENT ;"},
+ {"foo /*0*/ /*1*/ /*2*/ \n", "IDENT COMMENT COMMENT COMMENT ;"},
+ {"foo /**/ /*-------------*/ /*----\n*/bar /* \n*/baa\n", "IDENT COMMENT COMMENT COMMENT ; IDENT COMMENT ; IDENT ;"},
+ {"foo /* an EOF terminates a line */", "IDENT COMMENT ;"},
+ {"foo /* an EOF terminates a line */ /*", "IDENT COMMENT COMMENT ;"},
+ {"foo /* an EOF terminates a line */ //", "IDENT COMMENT COMMENT ;"},
+
+ {"package main\n\nfunc main() {\n\tif {\n\t\treturn /* */ }\n}\n", "package IDENT ; func IDENT ( ) { if { return COMMENT } ; } ;"},
+ {"package main", "package IDENT ;"},
}
-func TestSemis(t *testing.T) {
- for _, line := range lines {
- checkSemi(t, line, 0)
- checkSemi(t, line, ScanComments)
+func TestSemicolons(t *testing.T) {
+ for _, test := range semicolonTests {
+ input, want := test.input, test.want
+ checkSemi(t, input, want, 0)
+ checkSemi(t, input, want, ScanComments)
// if the input ended in newlines, the input must tokenize the
// same with or without those newlines
- for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
- checkSemi(t, line[0:i], 0)
- checkSemi(t, line[0:i], ScanComments)
+ for i := len(input) - 1; i >= 0 && input[i] == '\n'; i-- {
+ checkSemi(t, input[0:i], want, 0)
+ checkSemi(t, input[0:i], want, ScanComments)
}
}
}