--- /dev/null
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+ "container/vector";
+ "fmt";
+ "go/token";
+ "io";
+ "os";
+ "sort";
+)
+
+
+// An implementation of an ErrorHandler may be provided to the Scanner.
+// If a syntax error is encountered and a handler was installed, Error
+// is called with a position and an error message. The position points
+// to the beginning of the offending token.
+//
+type ErrorHandler interface {
+ Error(pos token.Position, msg string);
+}
+
+
+// ErrorVector implements the ErrorHandler interface. It must be
+// initialized with Init(). It maintains a list of errors which can
+// be retrieved with GetErrorList and GetError.
+//
+// A common usage pattern is to embed an ErrorVector alongside a
+// scanner in a data structure that uses the scanner. By passing a
+// reference to an ErrorVector to the scanner's Init call, default
+// error handling is obtained.
+//
+type ErrorVector struct {
+ errors vector.Vector;
+}
+
+
+// Init initializes an ErrorVector.
+func (h *ErrorVector) Init() {
+ h.errors.Init(0);
+}
+
+
+// NewErrorVector creates a new ErrorVector.
+func NewErrorVector() *ErrorVector {
+ h := new(ErrorVector);
+ h.Init();
+ return h;
+}
+
+
+// ErrorCount returns the number of errors collected.
+func (h *ErrorVector) ErrorCount() int {
+ return h.errors.Len();
+}
+
+
+// Within ErrorVector, an error is represented by an Error node. The
+// position Pos, if valid, points to the beginning of the offending
+// token, and the error condition is described by Msg.
+//
+type Error struct {
+ Pos token.Position;
+ Msg string;
+}
+
+
+func (e *Error) String() string {
+ s := e.Pos.Filename;
+ if s != "" {
+ s += ":";
+ }
+ if e.Pos.IsValid() {
+ s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column);
+ }
+ if s != "" {
+ s += " ";
+ }
+ return s + e.Msg;
+}
+
+
+// An ErrorList is a (possibly sorted) list of Errors.
+type ErrorList []*Error
+
+
+// ErrorList implements the SortInterface.
+func (p ErrorList) Len() int { return len(p); }
+func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i]; }
+
+
+func (p ErrorList) Less(i, j int) bool {
+ e := &p[i].Pos;
+ f := &p[j].Pos;
+ // Note that it is not sufficient to simply compare file offsets because
+ // the offsets do not reflect modified line information (through //line
+ // comments).
+ if e.Filename < f.Filename {
+ return true;
+ }
+ if e.Filename == f.Filename {
+ if e.Line < f.Line {
+ return true;
+ }
+ if e.Line == f.Line {
+ return e.Column < f.Column;
+ }
+ }
+ return false;
+}
+
+
+func (p ErrorList) String() string {
+ switch len(p) {
+ case 0:
+ return "unspecified error";
+ case 1:
+ return p[0].String();
+ }
+ return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
+}
+
+
+// These constants control the construction of the ErrorList
+// returned by GetErrors.
+//
+const (
+ Raw = iota; // leave error list unchanged
+ Sorted; // sort error list by file, line, and column number
+ NoMultiples; // sort error list and leave only the first error per line
+)
+
+
+// GetErrorList returns the list of errors collected by an ErrorVector.
+// The construction of the ErrorList returned is controlled by the mode
+// parameter. If there are no errors, the result is nil.
+//
+func (h *ErrorVector) GetErrorList(mode int) ErrorList {
+ if h.errors.Len() == 0 {
+ return nil;
+ }
+
+ list := make(ErrorList, h.errors.Len());
+ for i := 0; i < h.errors.Len(); i++ {
+ list[i] = h.errors.At(i).(*Error);
+ }
+
+ if mode >= Sorted {
+ sort.Sort(list);
+ }
+
+ if mode >= NoMultiples {
+ var last token.Position; // initial last.Line is != any legal error line
+ i := 0;
+ for _, e := range list {
+ if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
+ last = e.Pos;
+ list[i] = e;
+ i++;
+ }
+ }
+ list = list[0 : i];
+ }
+
+ return list;
+}
+
+
+// GetError is like GetErrorList, but it returns an os.Error instead
+// so that a nil result can be assigned to an os.Error variable and
+// remains nil.
+//
+func (h *ErrorVector) GetError(mode int) os.Error {
+ if h.errors.Len() == 0 {
+ return nil;
+ }
+
+ return h.GetErrorList(mode);
+}
+
+
+// ErrorVector implements the ErrorHandler interface.
+func (h *ErrorVector) Error(pos token.Position, msg string) {
+ h.errors.Push(&Error{pos, msg});
+}
+
+
+// PrintError is a utility function that prints a list of errors to w,
+// one error per line, if the err parameter is an ErrorList. Otherwise
+// it prints the err string.
+//
+func PrintError(w io.Writer, err os.Error) {
+ if list, ok := err.(ErrorList); ok {
+ for _, e := range list {
+ fmt.Fprintf(w, "%s\n", e);
+ }
+ } else {
+ fmt.Fprintf(w, "%s\n", err);
+ }
+}
package scanner
import (
+ "bytes";
"go/token";
+ "go/scanner";
"strconv";
"unicode";
"utf8";
)
-// An implementation of an ErrorHandler may be provided to the Scanner.
-// If a syntax error is encountered and a handler was installed, Error
-// is called with a position and an error message. The position points
-// to the beginning of the offending token.
-//
-type ErrorHandler interface {
- Error(pos token.Position, msg string);
-}
-
-
// A Scanner holds the scanner's internal state while processing
// a given text. It can be allocated as part of another data
// structure but must be initialized via Init before use. For
// Init prepares the scanner S to tokenize the text src. Calls to Scan
// will use the error handler err if they encounter a syntax error and
// err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The mode parameter determines how
-// comments and illegal characters are handled.
+// ErrorCount is incremented by one. The filename parameter is used as
+// filename in the token.Position returned by Scan for each token. The
+// mode parameter determines how comments and illegal characters are
+// handled.
//
-func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
// Explicitly initialize all fields since a scanner may be reused.
S.src = src;
S.err = err;
S.mode = mode;
- S.pos = token.Position{0, 1, 0};
+ S.pos = token.Position{filename, 0, 1, 0};
S.offset = 0;
S.ErrorCount = 0;
S.next();
}
+var prefix = []byte{'l', 'i', 'n', 'e', ' '}; // "line "
+
func (S *Scanner) scanComment(pos token.Position) {
// first '/' already consumed
if S.ch == '\n' {
// '\n' is not part of the comment
// (the comment ends on the same line where it started)
+ if pos.Column == 1 {
+ text := S.src[pos.Offset+2 : S.pos.Offset];
+ if bytes.HasPrefix(text, prefix) {
+ // comment starts at beginning of line with "//line ";
+ // get filename and line number, if any
+ i := bytes.Index(text, []byte{':'});
+ if i >= 0 {
+ if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 {
+ // valid //line filename:line comment;
+ // update scanner position
+ S.pos.Filename = string(text[len(prefix) : i]);
+ S.pos.Line = line;
+ }
+ }
+ }
+ }
return;
}
}
// false (usually when the token value is token.EOF). The result is the number
// of errors encountered.
//
-func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
+func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
var s Scanner;
- s.Init(src, err, mode);
+ s.Init(filename, src, err, mode);
for f(s.Scan()) {
// action happens in f
}
import (
"go/scanner";
"go/token";
+ "os";
"strings";
"testing";
)
}
+func checkPos(t *testing.T, lit string, pos, expected token.Position) {
+ if pos.Filename != expected.Filename {
+ t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename);
+ }
+ if pos.Offset != expected.Offset {
+ t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset);
+ }
+ if pos.Line != expected.Line {
+ t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line);
+ }
+ if pos.Column!= expected.Column {
+ t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column);
+ }
+}
+
+
// Verify that calling Scan() provides the correct results.
func TestScan(t *testing.T) {
// make source
var src string;
- for i, e := range tokens {
+ for _, e := range tokens {
src += e.lit + whitespace;
}
whitespace_linecount := NewlineCount(whitespace);
// verify scan
index := 0;
- eloc := token.Position{0, 1, 1};
- nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
+ epos := token.Position{"", 0, 1, 1};
+ nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
func (pos token.Position, tok token.Token, litb []byte) bool {
e := elt{token.EOF, "", special};
if index < len(tokens) {
lit := string(litb);
if tok == token.EOF {
lit = "<EOF>";
- eloc.Column = 0;
- }
- if pos.Offset != eloc.Offset {
- t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
- }
- if pos.Line != eloc.Line {
- t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
- }
- if pos.Column!= eloc.Column {
- t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
+ epos.Column = 0;
}
+ checkPos(t, lit, pos, epos);
if tok != e.tok {
t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
}
if tokenclass(tok) != e.class {
t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
}
- eloc.Offset += len(lit) + len(whitespace);
- eloc.Line += NewlineCount(lit) + whitespace_linecount;
+ epos.Offset += len(lit) + len(whitespace);
+ epos.Line += NewlineCount(lit) + whitespace_linecount;
if tok == token.COMMENT && litb[1] == '/' {
// correct for unaccounted '/n' in //-style comment
- eloc.Offset++;
- eloc.Line++;
+ epos.Offset++;
+ epos.Line++;
}
index++;
return tok != token.EOF;
}
+type seg struct {
+ srcline string; // a line of source text
+ filename string; // filename for current token
+ line int; // line number for current token
+}
+
+
+var segments = []seg{
+ // exactly one token per line since the test consumes one token per segment
+ seg{ " line1", "TestLineComments", 1 },
+ seg{ "\nline2", "TestLineComments", 2 },
+ seg{ "\nline3 //line File1.go:100", "TestLineComments", 3 }, // bad line comment, ignored
+ seg{ "\nline4", "TestLineComments", 4 },
+ seg{ "\n//line File1.go:100\n line100", "File1.go", 100 },
+ seg{ "\n//line File2.go:200\n line200", "File2.go", 200 },
+ seg{ "\n//line :1\n line1", "", 1 },
+ seg{ "\n//line foo:42\n line42", "foo", 42 },
+ seg{ "\n //line foo:42\n line44", "foo", 44 }, // bad line comment, ignored
+ seg{ "\n//line foo 42\n line46", "foo", 46 }, // bad line comment, ignored
+ seg{ "\n//line foo:42 extra text\n line48", "foo", 48 }, // bad line comment, ignored
+ seg{ "\n//line foo:42\n line42", "foo", 42 },
+ seg{ "\n//line foo:42\n line42", "foo", 42 },
+ seg{ "\n//line File1.go:100\n line100", "File1.go", 100 },
+}
+
+
+// Verify that comments of the form "//line filename:line" are interpreted correctly.
+func TestLineComments(t *testing.T) {
+ // make source
+ var src string;
+ for _, e := range segments {
+ src += e.srcline;
+ }
+
+ // verify scan
+ var S scanner.Scanner;
+ S.Init("TestLineComments", strings.Bytes(src), nil, 0);
+ for _, s := range segments {
+ pos, tok, lit := S.Scan();
+ checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column});
+ }
+
+ if S.ErrorCount != 0 {
+ t.Errorf("found %d errors", S.ErrorCount);
+ }
+}
+
+
// Verify that initializing the same scanner more then once works correctly.
func TestInit(t *testing.T) {
var s scanner.Scanner;
// 1st init
- s.Init(strings.Bytes("if true { }"), nil, 0);
+ s.Init("", strings.Bytes("if true { }"), nil, 0);
s.Scan(); // if
s.Scan(); // true
pos, tok, lit := s.Scan(); // {
}
// 2nd init
- s.Init(strings.Bytes("go true { ]"), nil, 0);
+ s.Init("", strings.Bytes("go true { ]"), nil, 0);
pos, tok, lit = s.Scan(); // go
if tok != token.GO {
t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
var s scanner.Scanner;
const src = "*?*$*@*";
- s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
+ s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
for offs, ch := range src {
pos, tok, lit := s.Scan();
if pos.Offset != offs {
t.Errorf("found %d errors", s.ErrorCount);
}
}
+
+
+func TestStdErrorHander(t *testing.T) {
+ const src =
+ "@\n" // illegal character, cause an error
+ "@ @\n" // two errors on the same line
+ "//line File2:20\n"
+ "@\n" // different file, but same line
+ "//line File2:1\n"
+ "@ @\n" // same file, decreasing line number
+ "//line File1:1\n"
+ "@ @ @" // original file, line 1 again
+ ;
+
+ var s scanner.Scanner;
+ v := NewErrorVector();
+ nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0,
+ func (pos token.Position, tok token.Token, litb []byte) bool {
+ return tok != token.EOF;
+ }
+ );
+
+ list := v.GetErrorList(Raw);
+ if len(list) != 9 {
+ t.Errorf("found %d raw errors, expected 9", len(list));
+ PrintError(os.Stderr, list);
+ }
+
+ list = v.GetErrorList(Sorted);
+ if len(list) != 9 {
+ t.Errorf("found %d sorted errors, expected 9", len(list));
+ PrintError(os.Stderr, list);
+ }
+
+ list = v.GetErrorList(NoMultiples);
+ if len(list) != 4 {
+ t.Errorf("found %d one-per-line errors, expected 4", len(list));
+ PrintError(os.Stderr, list);
+ }
+
+ if v.ErrorCount() != nerrors {
+ t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors);
+ }
+}