text/scanner: provide facility for custom identifiers

author Robert Griesemer <gri@golang.org>

Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)

committer Robert Griesemer <gri@golang.org>

Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)
author Robert Griesemer <gri@golang.org>
Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)
committer Robert Griesemer <gri@golang.org>
Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)
diff --git a/src/pkg/text/scanner/scanner.go b/src/pkg/text/scanner/scanner.go

index db7ca73c68da2459af8f772d16fb6523c177bbba..e7cdd33366a2f95706922612d8f13f416d9076bd 100644 (file)
--- a/src/pkg/text/scanner/scanner.go
+++ b/src/pkg/text/scanner/scanner.go
@@ -11,7 +11,7 @@
  // By default, a Scanner skips white space and Go comments and recognizes all
  // literals as defined by the Go language specification.  It may be
  // customized to recognize only a subset of those literals and to recognize
-// different white space characters.
+// different identifier and white space characters.
  //
  // Basic usage pattern:
  //
@@ -34,8 +34,6 @@ import (
         "unicode/utf8"
  )
  
-// TODO(gri): Consider changing this to use the new (token) Position package.
-
  // A source position is represented by a Position value.
  // A position is valid if Line > 0.
  type Position struct {
@@ -164,6 +162,13 @@ type Scanner struct {
         // for values ch > ' '). The field may be changed at any time.
         Whitespace uint64
  
+       // IsIdentRune is a predicate controlling the characters accepted
+       // as the ith rune in an identifier. The set of valid characters
+       // must not intersect with the set of white space characters.
+       // If no IsIdentRune function is set, regular Go identifiers are
+       // accepted instead. The field may be changed at any time.
+       IsIdentRune func(ch rune, i int) bool
+
         // Start position of most recently scanned token; set by Scan.
         // Calling Init or Next invalidates the position (Line == 0).
         // The Filename field is always left untouched by the Scanner.
@@ -334,9 +339,17 @@ func (s *Scanner) error(msg string) {
         fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
  }
  
+func (s *Scanner) isIdentRune(ch rune, i int) bool {
+       if s.IsIdentRune != nil {
+               return s.IsIdentRune(ch, i)
+       }
+       return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
+}
+
  func (s *Scanner) scanIdentifier() rune {
-       ch := s.next() // read character after first '_' or letter
-       for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
+       // we know the zero'th rune is OK; start with 2nd one
+       ch := s.next()
+       for i := 1; s.isIdentRune(ch, i); i++ {
                 ch = s.next()
         }
         return ch
@@ -563,7 +576,7 @@ redo:
         // determine token value
         tok := ch
         switch {
-       case unicode.IsLetter(ch) || ch == '_':
+       case s.isIdentRune(ch, 0):
                 if s.Mode&ScanIdents != 0 {
                         tok = Ident
                         ch = s.scanIdentifier()
diff --git a/src/pkg/text/scanner/scanner_test.go b/src/pkg/text/scanner/scanner_test.go

index 7d3f597eb9ab074168bd2544d17c8f2f4a673869..702fac2b1adf20c1652df74f9a2684111be7d2e2 100644 (file)
--- a/src/pkg/text/scanner/scanner_test.go
+++ b/src/pkg/text/scanner/scanner_test.go
@@ -357,6 +357,28 @@ func TestScanSelectedMask(t *testing.T) {
         testScanSelectedMode(t, ScanComments, Comment)
  }
  
+func TestScanCustomIdent(t *testing.T) {
+       const src = "faab12345 a12b123 a12 3b"
+       s := new(Scanner).Init(strings.NewReader(src))
+       // ident = ( 'a' | 'b' ) { digit } .
+       // digit = '0' .. '3' .
+       // with a maximum length of 4
+       s.IsIdentRune = func(ch rune, i int) bool {
+               return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
+       }
+       checkTok(t, s, 1, s.Scan(), 'f', "f")
+       checkTok(t, s, 1, s.Scan(), Ident, "a")
+       checkTok(t, s, 1, s.Scan(), Ident, "a")
+       checkTok(t, s, 1, s.Scan(), Ident, "b123")
+       checkTok(t, s, 1, s.Scan(), Int, "45")
+       checkTok(t, s, 1, s.Scan(), Ident, "a12")
+       checkTok(t, s, 1, s.Scan(), Ident, "b123")
+       checkTok(t, s, 1, s.Scan(), Ident, "a12")
+       checkTok(t, s, 1, s.Scan(), Int, "3")
+       checkTok(t, s, 1, s.Scan(), Ident, "b")
+       checkTok(t, s, 1, s.Scan(), EOF, "")
+}
+
  func TestScanNext(t *testing.T) {
         const BOM = '\uFEFF'
         BOMs := string(BOM)
author	Robert Griesemer <gri@golang.org>
	Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)
committer	Robert Griesemer <gri@golang.org>
	Mon, 16 Jun 2014 23:32:47 +0000 (16:32 -0700)
src/pkg/text/scanner/scanner.go		patch \| blob \| history
src/pkg/text/scanner/scanner_test.go		patch \| blob \| history