// By default, a Scanner skips white space and Go comments and recognizes all
// literals as defined by the Go language specification. It may be
// customized to recognize only a subset of those literals and to recognize
-// different white space characters.
+// different identifier and white space characters.
//
// Basic usage pattern:
//
"unicode/utf8"
)
-// TODO(gri): Consider changing this to use the new (token) Position package.
-
// A source position is represented by a Position value.
// A position is valid if Line > 0.
type Position struct {
// for values ch > ' '). The field may be changed at any time.
Whitespace uint64
+ // IsIdentRune is a predicate controlling the characters accepted
+ // as the ith rune in an identifier. The set of valid characters
+ // must not intersect with the set of white space characters.
+ // If no IsIdentRune function is set, regular Go identifiers are
+ // accepted instead. The field may be changed at any time.
+ IsIdentRune func(ch rune, i int) bool
+
// Start position of most recently scanned token; set by Scan.
// Calling Init or Next invalidates the position (Line == 0).
// The Filename field is always left untouched by the Scanner.
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
+func (s *Scanner) isIdentRune(ch rune, i int) bool {
+ if s.IsIdentRune != nil {
+ return s.IsIdentRune(ch, i)
+ }
+ return ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) && i > 0
+}
+
func (s *Scanner) scanIdentifier() rune {
- ch := s.next() // read character after first '_' or letter
- for ch == '_' || unicode.IsLetter(ch) || unicode.IsDigit(ch) {
+ // we know the zero'th rune is OK; start with 2nd one
+ ch := s.next()
+ for i := 1; s.isIdentRune(ch, i); i++ {
ch = s.next()
}
return ch
// determine token value
tok := ch
switch {
- case unicode.IsLetter(ch) || ch == '_':
+ case s.isIdentRune(ch, 0):
if s.Mode&ScanIdents != 0 {
tok = Ident
ch = s.scanIdentifier()
testScanSelectedMode(t, ScanComments, Comment)
}
+func TestScanCustomIdent(t *testing.T) {
+ const src = "faab12345 a12b123 a12 3b"
+ s := new(Scanner).Init(strings.NewReader(src))
+ // ident = ( 'a' | 'b' ) { digit } .
+ // digit = '0' .. '3' .
+ // with a maximum length of 4
+ s.IsIdentRune = func(ch rune, i int) bool {
+ return i == 0 && (ch == 'a' || ch == 'b') || 0 < i && i < 4 && '0' <= ch && ch <= '3'
+ }
+ checkTok(t, s, 1, s.Scan(), 'f', "f")
+ checkTok(t, s, 1, s.Scan(), Ident, "a")
+ checkTok(t, s, 1, s.Scan(), Ident, "a")
+ checkTok(t, s, 1, s.Scan(), Ident, "b123")
+ checkTok(t, s, 1, s.Scan(), Int, "45")
+ checkTok(t, s, 1, s.Scan(), Ident, "a12")
+ checkTok(t, s, 1, s.Scan(), Ident, "b123")
+ checkTok(t, s, 1, s.Scan(), Ident, "a12")
+ checkTok(t, s, 1, s.Scan(), Int, "3")
+ checkTok(t, s, 1, s.Scan(), Ident, "b")
+ checkTok(t, s, 1, s.Scan(), EOF, "")
+}
+
func TestScanNext(t *testing.T) {
const BOM = '\uFEFF'
BOMs := string(BOM)