}
-func is_oct_digit (ch int) bool {
- return '0' <= ch && ch <= '7';
+func digit_val (ch int) int {
+ if '0' <= ch && ch <= '9' {
+ return ch - '0';
+ }
+ if 'a' <= ch && ch <= 'f' {
+ return ch - 'a' + 10;
+ }
+ if 'A' <= ch && ch <= 'F' {
+ return ch - 'A' + 10;
+ }
+ return 16; // larger than any legal digit val
}
-func is_dec_digit (ch int) bool {
- return '0' <= ch && ch <= '9';
+export Scanner
+type Scanner struct {
+ src string;
+ pos int;
+ ch int; // one char look-ahead
}
-func is_hex_digit (ch int) bool {
- return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F';
+export Token
+type Token struct {
+ val int;
+ beg, end int;
+ txt string;
}
-export Scanner
-type Scanner struct {
- src string;
- pos int;
- ch int; // one char look-ahead
+func (T *Token) Print () {
+ print TokenName(T.val), " [", T.beg, ", ", T.end, "[ ", T.txt, "\n";
}
func (S *Scanner) ScanIdentifier () int {
beg := S.pos - 1;
- for is_letter(S.ch) || is_dec_digit(S.ch) {
+ for is_letter(S.ch) || digit_val(S.ch) < 10 {
S.Next();
}
end := S.pos - 1;
func (S *Scanner) ScanMantissa (base int) {
- for is_dec_digit(S.ch) {
+ for digit_val(S.ch) < base {
S.Next();
}
}
}
if S.ch == '0' {
+ // TODO bug: doesn't accept 09.0 !
// int
S.Next();
if S.ch == 'x' || S.ch == 'X' {
}
-func (S *Scanner) ScanOctDigits(n int) {
- for ; n > 0; n-- {
- if !is_oct_digit(S.ch) {
- panic "illegal char escape";
- }
+func (S *Scanner) ScanDigits(n int, base int) {
+ for digit_val(S.ch) < base {
S.Next();
+ n--;
}
-}
-
-
-func (S *Scanner) ScanHexDigits(n int) {
- for ; n > 0; n-- {
- if !is_hex_digit(S.ch) {
- panic "illegal char escape";
- }
- S.Next();
+ if n > 0 {
+ panic "illegal char escape";
}
}
return string(ch);
case '0', '1', '2', '3', '4', '5', '6', '7':
- S.ScanOctDigits(3 - 1); // 1 char already read
+ S.ScanDigits(3 - 1, 8); // 1 char already read
return ""; // TODO fix this
case 'x':
- S.ScanHexDigits(2);
+ S.ScanDigits(2, 16);
return ""; // TODO fix this
case 'u':
- S.ScanHexDigits(4);
+ S.ScanDigits(4, 16);
return ""; // TODO fix this
case 'U':
- S.ScanHexDigits(8);
+ S.ScanDigits(8, 16);
return ""; // TODO fix this
default:
S.ScanEscape();
}
- S.Next();
+ S.Expect('\'');
return NUMBER;
}
}
-func (S *Scanner) Scan () (tok, beg, end int) {
+func (S *Scanner) Scan (t *Token) (tok, beg, end int) {
S.SkipWhitespace();
var tok int = ILLEGAL;
var beg int = S.pos - 1;
var end int = beg;
- switch ch := S.ch; {
+ ch := S.ch;
+ switch {
case is_letter(ch): tok = S.ScanIdentifier();
- case is_dec_digit(ch): tok = S.ScanNumber(false);
+ case digit_val(ch) < 10: tok = S.ScanNumber(false);
default:
S.Next();
switch ch {
case '`': tok = S.ScanRawString();
case ':': tok = S.Select2(COLON, DEFINE);
case '.':
- if is_dec_digit(S.ch) {
+ if digit_val(S.ch) < 10 {
tok = S.ScanNumber(true);
} else {
tok = PERIOD;
if S.ch == '/' || S.ch == '*' {
S.SkipComment();
// cannot simply return because of 6g bug
- tok, beg, end = S.Scan();
+ tok, beg, end = S.Scan(t);
return tok, beg, end;
}
tok = S.Select2(QUO, QUO_ASSIGN);
}
end = S.pos - 1;
+
+ t.val = tok;
+ t.beg = beg;
+ t.end = end;
+ t.txt = S.src[beg : end];
+
return tok, beg, end;
}