- added Filename field to token.Position

author Robert Griesemer <gri@golang.org>

Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)

committer Robert Griesemer <gri@golang.org>

Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)
author Robert Griesemer <gri@golang.org>
Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)
committer Robert Griesemer <gri@golang.org>
Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)
diff --git a/src/pkg/go/scanner/Makefile b/src/pkg/go/scanner/Makefile

index d47fecb7c1f0a7a58eb1b2ebc26862d1e3b823d2..7845fe5a7d86713a619d0a9da980ff5a8cfd0bcf 100644 (file)
--- a/src/pkg/go/scanner/Makefile
+++ b/src/pkg/go/scanner/Makefile
@@ -2,8 +2,9 @@
  # Use of this source code is governed by a BSD-style
  # license that can be found in the LICENSE file.
  
+
  # DO NOT EDIT.  Automatically generated by gobuild.
-# gobuild -m >Makefile
+# gobuild -m scanner.go errors.go >Makefile
  
  D=/go/
  
@@ -20,7 +21,7 @@ test: packages
  
  coverage: packages
         gotest
-       6cov -g `pwd` | grep -v '_test\.go:'
+       6cov -g $$(pwd) | grep -v '_test\.go:'
  
  %.$O: %.go
         $(GC) -I_obj $*.go
@@ -32,16 +33,23 @@ coverage: packages
         $(AS) $*.s
  
  O1=\
+       errors.$O\
+
+O2=\
         scanner.$O\
  
  
-phases: a1
+phases: a1 a2
  _obj$D/scanner.a: phases
  
  a1: $(O1)
-       $(AR) grc _obj$D/scanner.a scanner.$O
+       $(AR) grc _obj$D/scanner.a errors.$O
         rm -f $(O1)
  
+a2: $(O2)
+       $(AR) grc _obj$D/scanner.a scanner.$O
+       rm -f $(O2)
+
  
  newpkg: clean
         mkdir -p _obj$D
@@ -49,6 +57,7 @@ newpkg: clean
  
  $(O1): newpkg
  $(O2): a1
+$(O3): a2
  
  nuke: clean
         rm -f $(GOROOT)/pkg/$(GOOS)_$(GOARCH)$D/scanner.a
diff --git a/src/pkg/go/scanner/errors.go b/src/pkg/go/scanner/errors.go

new file mode 100644 (file)

index 0000000..54770f0
--- /dev/null
+++ b/src/pkg/go/scanner/errors.go
@@ -0,0 +1,203 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package scanner
+
+import (
+       "container/vector";
+       "fmt";
+       "go/token";
+       "io";
+       "os";
+       "sort";
+)
+
+
+// An implementation of an ErrorHandler may be provided to the Scanner.
+// If a syntax error is encountered and a handler was installed, Error
+// is called with a position and an error message. The position points
+// to the beginning of the offending token.
+//
+type ErrorHandler interface {
+       Error(pos token.Position, msg string);
+}
+
+
+// ErrorVector implements the ErrorHandler interface. It must be
+// initialized with Init(). It maintains a list of errors which can
+// be retrieved with GetErrorList and GetError.
+//
+// A common usage pattern is to embed an ErrorVector alongside a
+// scanner in a data structure that uses the scanner. By passing a
+// reference to an ErrorVector to the scanner's Init call, default
+// error handling is obtained.
+//
+type ErrorVector struct {
+       errors vector.Vector;
+}
+
+
+// Init initializes an ErrorVector.
+func (h *ErrorVector) Init() {
+       h.errors.Init(0);
+}
+
+
+// NewErrorVector creates a new ErrorVector.
+func NewErrorVector() *ErrorVector {
+       h := new(ErrorVector);
+       h.Init();
+       return h;
+}
+
+
+// ErrorCount returns the number of errors collected.
+func (h *ErrorVector) ErrorCount() int {
+       return h.errors.Len();
+}
+
+
+// Within ErrorVector, an error is represented by an Error node. The
+// position Pos, if valid, points to the beginning of the offending
+// token, and the error condition is described by Msg.
+//
+type Error struct {
+       Pos token.Position;
+       Msg string;
+}
+
+
+func (e *Error) String() string {
+       s := e.Pos.Filename;
+       if s != "" {
+               s += ":";
+       }
+       if e.Pos.IsValid() {
+               s += fmt.Sprintf("%d:%d:", e.Pos.Line, e.Pos.Column);
+       }
+       if s != "" {
+               s += " ";
+       }
+       return s + e.Msg;
+}
+
+
+// An ErrorList is a (possibly sorted) list of Errors.
+type ErrorList []*Error
+
+
+// ErrorList implements the SortInterface.
+func (p ErrorList) Len() int  { return len(p); }
+func (p ErrorList) Swap(i, j int)  { p[i], p[j] = p[j], p[i]; }
+
+
+func (p ErrorList) Less(i, j int) bool  {
+       e := &p[i].Pos;
+       f := &p[j].Pos;
+       // Note that it is not sufficient to simply compare file offsets because
+       // the offsets do not reflect modified line information (through //line
+       // comments).
+       if e.Filename < f.Filename {
+               return true;
+       }
+       if e.Filename == f.Filename {
+               if e.Line < f.Line {
+                       return true;
+               }
+               if e.Line == f.Line {
+                       return e.Column < f.Column;
+               }
+       }
+       return false;
+}
+
+
+func (p ErrorList) String() string {
+       switch len(p) {
+       case 0:
+               return "unspecified error";
+       case 1:
+               return p[0].String();
+       }
+       return fmt.Sprintf("%s (and %d more errors)", p[0].String(), len(p) - 1);
+}
+
+
+// These constants control the construction of the ErrorList
+// returned by GetErrors.
+//
+const (
+       Raw = iota;  // leave error list unchanged
+       Sorted;  // sort error list by file, line, and column number
+       NoMultiples;  // sort error list and leave only the first error per line
+)
+
+
+// GetErrorList returns the list of errors collected by an ErrorVector.
+// The construction of the ErrorList returned is controlled by the mode
+// parameter. If there are no errors, the result is nil.
+//
+func (h *ErrorVector) GetErrorList(mode int) ErrorList {
+       if h.errors.Len() == 0 {
+               return nil;
+       }
+
+       list := make(ErrorList, h.errors.Len());
+       for i := 0; i < h.errors.Len(); i++ {
+               list[i] = h.errors.At(i).(*Error);
+       }
+
+       if mode >= Sorted {
+               sort.Sort(list);
+       }
+
+       if mode >= NoMultiples {
+               var last token.Position;  // initial last.Line is != any legal error line
+               i := 0;
+               for _, e := range list {
+                       if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
+                               last = e.Pos;
+                               list[i] = e;
+                               i++;
+                       }
+               }
+               list = list[0 : i];
+       }
+
+       return list;
+}
+
+
+// GetError is like GetErrorList, but it returns an os.Error instead
+// so that a nil result can be assigned to an os.Error variable and
+// remains nil.
+//
+func (h *ErrorVector) GetError(mode int) os.Error {
+       if h.errors.Len() == 0 {
+               return nil;
+       }
+
+       return h.GetErrorList(mode);
+}
+
+
+// ErrorVector implements the ErrorHandler interface.
+func (h *ErrorVector) Error(pos token.Position, msg string) {
+       h.errors.Push(&Error{pos, msg});
+}
+
+
+// PrintError is a utility function that prints a list of errors to w,
+// one error per line, if the err parameter is an ErrorList. Otherwise
+// it prints the err string.
+//
+func PrintError(w io.Writer, err os.Error) {
+       if list, ok := err.(ErrorList); ok {
+               for _, e := range list {
+                       fmt.Fprintf(w, "%s\n", e);
+               }
+       } else {
+               fmt.Fprintf(w, "%s\n", err);
+       }
+}
diff --git a/src/pkg/go/scanner/scanner.go b/src/pkg/go/scanner/scanner.go

index 795d56f8ba18ce0e2b5379821ce5de642abe1b4b..3a2d98514435ba422b491961939b1d6d548c4dec 100644 (file)
--- a/src/pkg/go/scanner/scanner.go
+++ b/src/pkg/go/scanner/scanner.go
@@ -9,23 +9,15 @@
  package scanner
  
  import (
+       "bytes";
         "go/token";
+       "go/scanner";
         "strconv";
         "unicode";
         "utf8";
  )
  
  
-// An implementation of an ErrorHandler may be provided to the Scanner.
-// If a syntax error is encountered and a handler was installed, Error
-// is called with a position and an error message. The position points
-// to the beginning of the offending token.
-//
-type ErrorHandler interface {
-       Error(pos token.Position, msg string);
-}
-
-
  // A Scanner holds the scanner's internal state while processing
  // a given text.  It can be allocated as part of another data
  // structure but must be initialized via Init before use. For
@@ -84,15 +76,17 @@ const (
  // Init prepares the scanner S to tokenize the text src. Calls to Scan
  // will use the error handler err if they encounter a syntax error and
  // err is not nil. Also, for each error encountered, the Scanner field
-// ErrorCount is incremented by one. The mode parameter determines how
-// comments and illegal characters are handled.
+// ErrorCount is incremented by one. The filename parameter is used as
+// filename in the token.Position returned by Scan for each token. The
+// mode parameter determines how comments and illegal characters are
+// handled.
  //
-func (S *Scanner) Init(src []byte, err ErrorHandler, mode uint) {
+func (S *Scanner) Init(filename string, src []byte, err ErrorHandler, mode uint) {
         // Explicitly initialize all fields since a scanner may be reused.
         S.src = src;
         S.err = err;
         S.mode = mode;
-       S.pos = token.Position{0, 1, 0};
+       S.pos = token.Position{filename, 0, 1, 0};
         S.offset = 0;
         S.ErrorCount = 0;
         S.next();
@@ -133,6 +127,8 @@ func (S *Scanner) expect(ch int) {
  }
  
  
+var prefix = []byte{'l', 'i', 'n', 'e', ' '};  // "line "
+
  func (S *Scanner) scanComment(pos token.Position) {
         // first '/' already consumed
  
@@ -143,6 +139,22 @@ func (S *Scanner) scanComment(pos token.Position) {
                         if S.ch == '\n' {
                                 // '\n' is not part of the comment
                                 // (the comment ends on the same line where it started)
+                               if pos.Column == 1 {
+                                       text := S.src[pos.Offset+2 : S.pos.Offset];
+                                       if bytes.HasPrefix(text, prefix) {
+                                               // comment starts at beginning of line with "//line ";
+                                               // get filename and line number, if any
+                                               i := bytes.Index(text, []byte{':'});
+                                               if i >= 0 {
+                                                       if line, err := strconv.Atoi(string(text[i+1 : len(text)])); err == nil && line > 0 {
+                                                               // valid //line filename:line comment;
+                                                               // update scanner position
+                                                               S.pos.Filename = string(text[len(prefix) : i]);
+                                                               S.pos.Line = line;
+                                                       }
+                                               }
+                                       }
+                               }
                                 return;
                         }
                 }
@@ -492,9 +504,9 @@ scan_again:
  // false (usually when the token value is token.EOF). The result is the number
  // of errors encountered.
  //
-func Tokenize(src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
+func Tokenize(filename string, src []byte, err ErrorHandler, mode uint, f func (pos token.Position, tok token.Token, lit []byte) bool) int {
         var s Scanner;
-       s.Init(src, err, mode);
+       s.Init(filename, src, err, mode);
         for f(s.Scan()) {
                 // action happens in f
         }
diff --git a/src/pkg/go/scanner/scanner_test.go b/src/pkg/go/scanner/scanner_test.go

index 18dae19cc225e681f9a91daedd39b6474b4cd63d..0cb200b48fae175ceddfbb3de2e84cb3d3f029e2 100644 (file)
--- a/src/pkg/go/scanner/scanner_test.go
+++ b/src/pkg/go/scanner/scanner_test.go
@@ -7,6 +7,7 @@ package scanner
  import (
         "go/scanner";
         "go/token";
+       "os";
         "strings";
         "testing";
  )
@@ -178,19 +179,35 @@ func NewlineCount(s string) int {
  }
  
  
+func checkPos(t *testing.T, lit string, pos, expected token.Position) {
+       if pos.Filename != expected.Filename {
+               t.Errorf("bad filename for %s: got %s, expected %s", lit, pos.Filename, expected.Filename);
+       }
+       if pos.Offset != expected.Offset {
+               t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, expected.Offset);
+       }
+       if pos.Line != expected.Line {
+               t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, expected.Line);
+       }
+       if pos.Column!= expected.Column {
+               t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, expected.Column);
+       }
+}
+
+
  // Verify that calling Scan() provides the correct results.
  func TestScan(t *testing.T) {
         // make source
         var src string;
-       for i, e := range tokens {
+       for _, e := range tokens {
                 src += e.lit + whitespace;
         }
         whitespace_linecount := NewlineCount(whitespace);
  
         // verify scan
         index := 0;
-       eloc := token.Position{0, 1, 1};
-       nerrors := scanner.Tokenize(strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
+       epos := token.Position{"", 0, 1, 1};
+       nerrors := scanner.Tokenize("", strings.Bytes(src), &TestErrorHandler{t}, scanner.ScanComments,
                 func (pos token.Position, tok token.Token, litb []byte) bool {
                         e := elt{token.EOF, "", special};
                         if index < len(tokens) {
@@ -199,17 +216,9 @@ func TestScan(t *testing.T) {
                         lit := string(litb);
                         if tok == token.EOF {
                                 lit = "<EOF>";
-                               eloc.Column = 0;
-                       }
-                       if pos.Offset != eloc.Offset {
-                               t.Errorf("bad position for %s: got %d, expected %d", lit, pos.Offset, eloc.Offset);
-                       }
-                       if pos.Line != eloc.Line {
-                               t.Errorf("bad line for %s: got %d, expected %d", lit, pos.Line, eloc.Line);
-                       }
-                       if pos.Column!= eloc.Column {
-                               t.Errorf("bad column for %s: got %d, expected %d", lit, pos.Column, eloc.Column);
+                               epos.Column = 0;
                         }
+                       checkPos(t, lit, pos, epos);
                         if tok != e.tok {
                                 t.Errorf("bad token for %s: got %s, expected %s", lit, tok.String(), e.tok.String());
                         }
@@ -219,12 +228,12 @@ func TestScan(t *testing.T) {
                         if tokenclass(tok) != e.class {
                                 t.Errorf("bad class for %s: got %d, expected %d", lit, tokenclass(tok), e.class);
                         }
-                       eloc.Offset += len(lit) + len(whitespace);
-                       eloc.Line += NewlineCount(lit) + whitespace_linecount;
+                       epos.Offset += len(lit) + len(whitespace);
+                       epos.Line += NewlineCount(lit) + whitespace_linecount;
                         if tok == token.COMMENT && litb[1] == '/' {
                                 // correct for unaccounted '/n' in //-style comment
-                               eloc.Offset++;
-                               eloc.Line++;
+                               epos.Offset++;
+                               epos.Line++;
                         }
                         index++;
                         return tok != token.EOF;
@@ -236,12 +245,60 @@ func TestScan(t *testing.T) {
  }
  
  
+type seg struct {
+       srcline string;  // a line of source text
+       filename string;  // filename for current token
+       line int;  // line number for current token
+}
+
+
+var segments = []seg{
+       // exactly one token per line since the test consumes one token per segment
+       seg{ "  line1", "TestLineComments", 1 },
+       seg{ "\nline2", "TestLineComments", 2 },
+       seg{ "\nline3  //line File1.go:100", "TestLineComments", 3 },  // bad line comment, ignored
+       seg{ "\nline4", "TestLineComments", 4 },
+       seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
+       seg{ "\n//line File2.go:200\n  line200", "File2.go", 200 },
+       seg{ "\n//line :1\n  line1", "", 1 },
+       seg{ "\n//line foo:42\n  line42", "foo", 42 },
+       seg{ "\n //line foo:42\n  line44", "foo", 44 },  // bad line comment, ignored
+       seg{ "\n//line foo 42\n  line46", "foo", 46 },  // bad line comment, ignored
+       seg{ "\n//line foo:42 extra text\n  line48", "foo", 48 },  // bad line comment, ignored
+       seg{ "\n//line foo:42\n  line42", "foo", 42 },
+       seg{ "\n//line foo:42\n  line42", "foo", 42 },
+       seg{ "\n//line File1.go:100\n  line100", "File1.go", 100 },
+}
+
+
+// Verify that comments of the form "//line filename:line" are interpreted correctly.
+func TestLineComments(t *testing.T) {
+       // make source
+       var src string;
+       for _, e := range segments {
+               src += e.srcline;
+       }
+
+       // verify scan
+       var S scanner.Scanner;
+       S.Init("TestLineComments", strings.Bytes(src), nil, 0);
+       for _, s := range segments {
+               pos, tok, lit := S.Scan();
+               checkPos(t, string(lit), pos, token.Position{s.filename, pos.Offset, s.line, pos.Column});
+       }
+
+       if S.ErrorCount != 0 {
+               t.Errorf("found %d errors", S.ErrorCount);
+       }
+}
+
+
  // Verify that initializing the same scanner more then once works correctly.
  func TestInit(t *testing.T) {
         var s scanner.Scanner;
  
         // 1st init
-       s.Init(strings.Bytes("if true { }"), nil, 0);
+       s.Init("", strings.Bytes("if true { }"), nil, 0);
         s.Scan();  // if
         s.Scan();  // true
         pos, tok, lit := s.Scan();  // {
@@ -250,7 +307,7 @@ func TestInit(t *testing.T) {
         }
  
         // 2nd init
-       s.Init(strings.Bytes("go true { ]"), nil, 0);
+       s.Init("", strings.Bytes("go true { ]"), nil, 0);
         pos, tok, lit = s.Scan();  // go
         if tok != token.GO {
                 t.Errorf("bad token: got %s, expected %s", tok.String(), token.GO);
@@ -266,7 +323,7 @@ func TestIllegalChars(t *testing.T) {
         var s scanner.Scanner;
  
         const src = "*?*$*@*";
-       s.Init(strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
+       s.Init("", strings.Bytes(src), &TestErrorHandler{t}, scanner.AllowIllegalChars);
         for offs, ch := range src {
                 pos, tok, lit := s.Scan();
                 if pos.Offset != offs {
@@ -281,3 +338,47 @@ func TestIllegalChars(t *testing.T) {
                 t.Errorf("found %d errors", s.ErrorCount);
         }
  }
+
+
+func TestStdErrorHander(t *testing.T) {
+       const src =
+               "@\n"  // illegal character, cause an error
+               "@ @\n"  // two errors on the same line
+               "//line File2:20\n"
+               "@\n"  // different file, but same line
+               "//line File2:1\n"
+               "@ @\n"  // same file, decreasing line number
+               "//line File1:1\n"
+               "@ @ @"  // original file, line 1 again
+       ;
+
+       var s scanner.Scanner;
+       v := NewErrorVector();
+       nerrors := scanner.Tokenize("File1", strings.Bytes(src), v, 0,
+               func (pos token.Position, tok token.Token, litb []byte) bool {
+                       return tok != token.EOF;
+               }
+       );
+
+       list := v.GetErrorList(Raw);
+       if len(list) != 9 {
+               t.Errorf("found %d raw errors, expected 9", len(list));
+               PrintError(os.Stderr, list);
+       }
+
+       list = v.GetErrorList(Sorted);
+       if len(list) != 9 {
+               t.Errorf("found %d sorted errors, expected 9", len(list));
+               PrintError(os.Stderr, list);
+       }
+
+       list = v.GetErrorList(NoMultiples);
+       if len(list) != 4 {
+               t.Errorf("found %d one-per-line errors, expected 4", len(list));
+               PrintError(os.Stderr, list);
+       }
+
+       if v.ErrorCount() != nerrors {
+               t.Errorf("found %d errors, expected %d", v.ErrorCount(), nerrors);
+       }
+}
diff --git a/src/pkg/go/token/token.go b/src/pkg/go/token/token.go

index a70a75a54023dcf3964eba2858456cad28e919d9..3197b6637c2c7b4a7e9badb8a63bd308220d6dfb 100644 (file)
--- a/src/pkg/go/token/token.go
+++ b/src/pkg/go/token/token.go
@@ -327,6 +327,7 @@ func (tok Token) IsKeyword() bool {
  // A Position is valid if the line number is > 0.
  //
  type Position struct {
+       Filename string;  // filename, if any
         Offset int;  // byte offset, starting at 0
         Line int;  // line number, starting at 1
         Column int;  // column number, starting at 1 (character count)
author	Robert Griesemer <gri@golang.org>
	Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)
committer	Robert Griesemer <gri@golang.org>
	Tue, 14 Jul 2009 17:44:57 +0000 (10:44 -0700)
src/pkg/go/scanner/Makefile		patch \| blob \| history
src/pkg/go/scanner/errors.go	[new file with mode: 0644]	patch \| blob
src/pkg/go/scanner/scanner.go		patch \| blob \| history
src/pkg/go/scanner/scanner_test.go		patch \| blob \| history
src/pkg/go/token/token.go		patch \| blob \| history