// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package Scanner
+package scanner
import (
"utf8";
"unicode";
- "utils";
+ "strconv";
)
const (
ILLEGAL = iota;
-
- IDENT;
+ EOF;
+
INT;
FLOAT;
STRING;
- EOF;
-
+ IDENT;
COMMENT;
ADD;
DEC;
EQL;
- NEQ;
LSS;
- LEQ;
GTR;
- GEQ;
-
ASSIGN;
- DEFINE;
NOT;
+
+ NEQ;
+ LEQ;
+ GEQ;
+ DEFINE;
ELLIPSIS;
LPAREN;
- RPAREN;
LBRACK;
- RBRACK;
LBRACE;
- RBRACE;
-
COMMA;
+ PERIOD;
+
+ RPAREN;
+ RBRACK;
+ RBRACE;
SEMICOLON;
COLON;
- PERIOD;
// keywords
keywords_beg;
func TokenString(tok int) string {
switch tok {
case ILLEGAL: return "ILLEGAL";
+ case EOF: return "EOF";
- case IDENT: return "IDENT";
case INT: return "INT";
case FLOAT: return "FLOAT";
case STRING: return "STRING";
- case EOF: return "EOF";
-
+ case IDENT: return "IDENT";
case COMMENT: return "COMMENT";
case ADD: return "+";
case DEC: return "--";
case EQL: return "==";
- case NEQ: return "!=";
case LSS: return "<";
- case LEQ: return "<=";
case GTR: return ">";
- case GEQ: return ">=";
-
case ASSIGN: return "=";
- case DEFINE: return ":=";
case NOT: return "!";
+
+ case NEQ: return "!=";
+ case LEQ: return "<=";
+ case GEQ: return ">=";
+ case DEFINE: return ":=";
case ELLIPSIS: return "...";
case LPAREN: return "(";
- case RPAREN: return ")";
case LBRACK: return "[";
- case RBRACK: return "]";
case LBRACE: return "{";
- case RBRACE: return "}";
-
case COMMA: return ",";
+ case PERIOD: return ".";
+
+ case RPAREN: return ")";
+ case RBRACK: return "]";
+ case RBRACE: return "}";
case SEMICOLON: return ";";
case COLON: return ":";
- case PERIOD: return ".";
case BREAK: return "break";
case CASE: return "case";
case VAR: return "var";
}
- return "token(" + Utils.IntToString(tok, 10) + ")";
+ return "token(" + strconv.Itoa(tok) + ")";
}
func digit_val(ch int) int {
+ // TODO: spec permits other Unicode digits as well
if '0' <= ch && ch <= '9' {
return ch - '0';
}
type ErrorHandler interface {
Error(pos int, msg string);
- Warning(pos int, msg string);
}
type Scanner struct {
// setup
+ src []byte; // source
err ErrorHandler;
- src string; // source
scan_comments bool;
// scanning
pos int; // current reading position
ch int; // one char look-ahead
chpos int; // position of ch
- linepos int; // position of beginning of line
-
- // testmode
- testmode bool;
- testpos int;
}
r, w := int(S.src[S.pos]), 1;
if r >= 0x80 {
// not ascii
- r, w = utf8.DecodeRuneInString(S.src, S.pos);
+ r, w = utf8.DecodeRune(S.src[S.pos : len(S.src)]);
}
S.ch = r;
S.chpos = S.pos;
}
-func (S *Scanner) Error(pos int, msg string) {
- // check for expected errors (test mode)
- if S.testpos < 0 || pos == S.testpos {
- // test mode:
- // S.testpos < 0: // follow-up errors are expected and ignored
- // S.testpos == 0: // an error is expected at S.testpos and ignored
- S.testpos = -1;
- return;
- }
-
+func (S *Scanner) error(pos int, msg string) {
S.err.Error(pos, msg);
}
-func (S *Scanner) expectNoErrors() {
- // set the next expected error position to one after eof
- // (the eof position is a legal error position!)
- S.testpos = len(S.src) + 1;
-}
-
-
-func (S *Scanner) Init(err ErrorHandler, src string, scan_comments, testmode bool) {
- S.err = err;
+func (S *Scanner) Init(src []byte, err ErrorHandler, scan_comments bool) {
S.src = src;
+ S.err = err;
S.scan_comments = scan_comments;
-
- S.pos = 0;
- S.linepos = 0;
-
- S.testmode = testmode;
- S.expectNoErrors(); // S.src must be set
- S.next(); // S.expectNoErrrors() must be called before
+ S.next();
}
case '\\': s = `\\`;
case '\'': s = `\'`;
}
- return "'" + s + "' (U+" + Utils.IntToString(ch, 16) + ")";
+ return "'" + s + "' (U+" + strconv.Itob(ch, 16) + ")";
}
func (S *Scanner) expect(ch int) {
if S.ch != ch {
- S.Error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
+ S.error(S.chpos, "expected " + charString(ch) + ", found " + charString(S.ch));
}
S.next(); // make always progress
}
}
-func (S *Scanner) scanComment() string {
+func (S *Scanner) scanComment() []byte {
// first '/' already consumed
pos := S.chpos - 1;
}
}
- S.Error(pos, "comment not terminated");
+ S.error(pos, "comment not terminated");
exit:
- comment := S.src[pos : S.chpos];
-
- if S.testmode {
- // interpret ERROR and SYNC comments
- oldpos := -1;
- switch {
- case len(comment) >= 8 && comment[3 : 8] == "ERROR" :
- // an error is expected at the next token position
- oldpos = S.testpos;
- S.skipWhitespace();
- S.testpos = S.chpos;
- case len(comment) >= 7 && comment[3 : 7] == "SYNC" :
- // scanning/parsing synchronized again - no (follow-up) errors expected
- oldpos = S.testpos;
- S.expectNoErrors();
- }
-
- if 0 <= oldpos && oldpos <= len(S.src) {
- // the previous error was not found
- S.Error(oldpos, "ERROR not found"); // TODO this should call ErrorMsg
- }
- }
-
- return comment;
+ return S.src[pos : S.chpos];
}
-func (S *Scanner) scanIdentifier() (tok int, val string) {
+func (S *Scanner) scanIdentifier() (tok int, val []byte) {
pos := S.chpos;
for is_letter(S.ch) || digit_val(S.ch) < 10 {
S.next();
val = S.src[pos : S.chpos];
var present bool;
- tok, present = keywords[val];
+ tok, present = keywords[string(val)];
if !present {
tok = IDENT;
}
}
-func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val string) {
+func (S *Scanner) scanNumber(seen_decimal_point bool) (tok int, val []byte) {
pos := S.chpos;
tok = INT;
n--;
}
if n > 0 {
- S.Error(S.chpos, "illegal char escape");
+ S.error(S.chpos, "illegal char escape");
}
}
-func (S *Scanner) scanEscape(quote int) string {
- // TODO: fix this routine
-
+func (S *Scanner) scanEscape(quote int) {
ch := S.ch;
pos := S.chpos;
S.next();
switch ch {
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
- return string(ch);
-
+ case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+ // nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7':
- S.scanDigits(3 - 1, 8); // 1 char already read
- return ""; // TODO fix this
-
+ S.scanDigits(3 - 1, 8); // 1 char read already
case 'x':
S.scanDigits(2, 16);
- return ""; // TODO fix this
-
case 'u':
S.scanDigits(4, 16);
- return ""; // TODO fix this
-
case 'U':
S.scanDigits(8, 16);
- return ""; // TODO fix this
-
default:
- // check for quote outside the switch for better generated code (eventually)
- if ch == quote {
- return string(quote);
- }
- S.Error(pos, "illegal char escape");
+ S.error(pos, "illegal char escape");
}
-
- return ""; // TODO fix this
}
-func (S *Scanner) scanChar() string {
+func (S *Scanner) scanChar() []byte {
// '\'' already consumed
pos := S.chpos - 1;
}
-func (S *Scanner) scanString() string {
+func (S *Scanner) scanString() []byte {
// '"' already consumed
pos := S.chpos - 1;
ch := S.ch;
S.next();
if ch == '\n' || ch < 0 {
- S.Error(pos, "string not terminated");
+ S.error(pos, "string not terminated");
break;
}
if ch == '\\' {
}
-func (S *Scanner) scanRawString() string {
+func (S *Scanner) scanRawString() []byte {
// '`' already consumed
pos := S.chpos - 1;
ch := S.ch;
S.next();
if ch == '\n' || ch < 0 {
- S.Error(pos, "string not terminated");
+ S.error(pos, "string not terminated");
break;
}
}
}
-func (S *Scanner) Scan() (pos, tok int, val string) {
+func (S *Scanner) Scan() (pos, tok int, val []byte) {
loop:
S.skipWhitespace();
S.next(); // always make progress
switch ch {
case -1: tok = EOF;
- case '\n': tok, val = COMMENT, "\n";
+ case '\n': tok, val = COMMENT, []byte('\n');
case '"': tok, val = STRING, S.scanString();
case '\'': tok, val = INT, S.scanChar();
case '`': tok, val = STRING, S.scanRawString();
case '&': tok = S.select3(AND, AND_ASSIGN, '&', LAND);
case '|': tok = S.select3(OR, OR_ASSIGN, '|', LOR);
default:
- S.Error(pos, "illegal character " + charString(ch));
+ S.error(pos, "illegal character " + charString(ch));
tok = ILLEGAL;
}
}