COR = iota;
// keywords
+ KEYWORDS_BEG = iota;
BREAK = iota;
CASE = iota;
CONST = iota;
TRUE = iota;
TYPE = iota;
VAR = iota;
+ KEYWORDS_END = iota;
)
export TokenName
func TokenName(tok int) string {
switch (tok) {
- case ILLEGAL: return "ILLEGAL";
- case EOF: return "EOF";
- case IDENT: return "IDENT";
- case STRING: return "STRING";
- case NUMBER: return "NUMBER";
-
- case COMMA: return "COMMA";
- case COLON: return "COLON";
- case SEMICOLON: return "SEMICOLON";
- case PERIOD: return "PERIOD";
-
- case LPAREN: return "LPAREN";
- case RPAREN: return "RPAREN";
- case LBRACK: return "LBRACK";
- case RBRACK: return "RBRACK";
- case LBRACE: return "LBRACE";
- case RBRACE: return "RBRACE";
-
- case ASSIGN: return "ASSIGN";
- case DEFINE: return "DEFINE";
+ case ILLEGAL: return "illegal";
+ case EOF: return "eof";
+ case IDENT: return "ident";
+ case STRING: return "string";
+ case NUMBER: return "number";
+
+ case COMMA: return ",";
+ case COLON: return ":";
+ case SEMICOLON: return ";";
+ case PERIOD: return ".";
+
+ case LPAREN: return "(";
+ case RPAREN: return ")";
+ case LBRACK: return "[";
+ case RBRACK: return "]";
+ case LBRACE: return "{";
+ case RBRACE: return "}";
+
+ case ASSIGN: return "=";
+ case DEFINE: return ":=";
- case INC: return "INC";
- case DEC: return "DEC";
- case NOT: return "NOT";
+ case INC: return "++";
+ case DEC: return "--";
+ case NOT: return "!";
- case AND: return "AND";
- case OR: return "OR";
- case XOR: return "XOR";
+ case AND: return "&";
+ case OR: return "|";
+ case XOR: return "^";
- case ADD: return "ADD";
- case SUB: return "SUB";
- case MUL: return "MUL";
- case REM: return "REM";
- case QUO: return "QUO";
- case REM: return "REM";
+ case ADD: return "+";
+ case SUB: return "-";
+ case MUL: return "*";
+ case QUO: return "/";
+ case REM: return "%";
- case EQL: return "EQL";
- case NEQ: return "NEQ";
- case LSS: return "LSS";
- case LEQ: return "LEQ";
- case GTR: return "GTR";
- case GEQ: return "GEQ";
-
- case SHL: return SHL;
- case SHR: return SHR;
-
- case ADD_ASSIGN: return "ADD_ASSIGN";
- case SUB_ASSIGN: return "SUB_ASSIGN";
- case MUL_ASSIGN: return "MUL_ASSIGN";
- case QUO_ASSIGN: return "QUO_ASSIGN";
- case REM_ASSIGN: return "REM_ASSIGN";
-
- case AND_ASSIGN: return "AND_ASSIGN";
- case OR_ASSIGN: return "OR_ASSIGN";
- case XOR_ASSIGN: return "XOR_ASSIGN";
-
- case SHL_ASSIGN: return "SHL_ASSIGN";
- case SHR_ASSIGN: return "SHR_ASSIGN";
-
- case CAND: return "CAND";
- case COR: return "COR";
-
- case BREAK: return "BREAK";
- case CASE: return "CASE";
- case CONST: return "CONST";
- case CONTINUE: return "CONTINUE";
- case DEFAULT: return "DEFAULT";
- case ELSE: return "ELSE";
- case EXPORT: return "EXPORT";
- case FALLTHROUGH: return "FALLTHROUGH";
- case FALSE: return "FALSE";
- case FOR: return "FOR";
- case FUNC: return "FUNC";
- case GO: return "GO";
- case GOTO: return "GOTO";
- case IF: return "IF";
- case IMPORT: return "IMPORT";
- case INTERFACE: return "INTERFACE";
- case MAP: return "MAP";
- case NEW: return "NEW";
- case NIL: return "NIL";
- case PACKAGE: return "PACKAGE";
- case RANGE: return "RANGE";
- case RETURN: return "RETURN";
- case SELECT: return "SELECT";
- case STRUCT: return "STRUCT";
- case SWITCH: return "SWITCH";
- case TRUE: return "TRUE";
- case TYPE: return "TYPE";
- case VAR: return "VAR";
+ case EQL: return "==";
+ case NEQ: return "!=";
+ case LSS: return "<";
+ case LEQ: return "<=";
+ case GTR: return ">";
+ case GEQ: return ">=";
+
+ case SHL: return "<<";
+ case SHR: return ">>";
+
+ case ADD_ASSIGN: return "+=";
+ case SUB_ASSIGN: return "-=";
+ case MUL_ASSIGN: return "+=";
+ case QUO_ASSIGN: return "/=";
+ case REM_ASSIGN: return "%=";
+
+ case AND_ASSIGN: return "&=";
+ case OR_ASSIGN: return "|=";
+ case XOR_ASSIGN: return "^=";
+
+ case SHL_ASSIGN: return "<<=";
+ case SHR_ASSIGN: return ">>=";
+
+ case CAND: return "&&";
+ case COR: return "||";
+
+ case BREAK: return "break";
+ case CASE: return "case";
+ case CONST: return "const";
+ case CONTINUE: return "continue";
+ case DEFAULT: return "default";
+ case ELSE: return "else";
+ case EXPORT: return "export";
+ case FALLTHROUGH: return "fallthrough";
+ case FALSE: return "false";
+ case FOR: return "for";
+ case FUNC: return "func";
+ case GO: return "go";
+ case GOTO: return "goto";
+ case IF: return "if";
+ case IMPORT: return "import";
+ case INTERFACE: return "interface";
+ case MAP: return "map";
+ case NEW: return "new";
+ case NIL: return "nil";
+ case PACKAGE: return "package";
+ case RANGE: return "range";
+ case RETURN: return "return";
+ case SELECT: return "select";
+ case STRUCT: return "struct";
+ case SWITCH: return "switch";
+ case TRUE: return "true";
+ case TYPE: return "type";
+ case VAR: return "var";
}
return "???";
func (S *Scanner) Next () {
+ const (
+ Bit1 = 7;
+ Bitx = 6;
+ Bit2 = 5;
+ Bit3 = 4;
+ Bit4 = 3;
+
+ T1 = 0x00; // (1 << (Bit1 + 1) - 1) ^ 0xFF; // 0000 0000
+ Tx = 0x80; // (1 << (Bitx + 1) - 1) ^ 0xFF; // 1000 0000
+ T2 = 0xC0; // (1 << (Bit2 + 1) - 1) ^ 0xFF; // 1100 0000
+ T3 = 0xE0; // (1 << (Bit3 + 1) - 1) ^ 0xFF; // 1110 0000
+ T4 = 0xF0; // (1 << (Bit4 + 1) - 1) ^ 0xFF; // 1111 0000
+
+ Rune1 = 1 << (Bit1 + 0*Bitx) - 1; // 0000 0000 0111 1111
+ Rune2 = 1 << (Bit2 + 1*Bitx) - 1; // 0000 0111 1111 1111
+ Rune3 = 1 << (Bit3 + 2*Bitx) - 1; // 1111 1111 1111 1111
+
+ Maskx = 0x3F; // 1 << Bitx - 1; // 0011 1111
+ Testx = 0xC0; // Maskx ^ 0xFF; // 1100 0000
+
+ Bad = 0xFFFD; // Runeerror
+ );
+
src := S.src; // TODO only needed because of 6g bug
- if S.pos < len(src) {
- S.ch = int(S.src[S.pos]);
- S.pos++;
- if (S.ch >= 128) {
- panic "UTF-8 not handled"
+ lim := len(src);
+ pos := S.pos;
+
+ // 1-byte sequence
+ // 0000-007F => T1
+ if pos >= lim {
+ goto eof;
+ }
+ c0 := int(src[pos + 0]);
+ if c0 < Tx {
+ S.ch = c0;
+ S.pos = pos + 1;
+ return;
+ }
+
+ // 2-byte sequence
+ // 0080-07FF => T2 Tx
+ if pos + 1 >= lim {
+ goto eof;
+ }
+ c1 := int(src[pos + 1]) ^ Tx;
+ if c1 & Testx != 0 {
+ goto bad;
+ }
+ if c0 < T3 {
+ if c0 < T2 {
+ goto bad;
}
- } else {
- S.ch = -1;
+ r := (c0 << Bitx | c1) & Rune2;
+ if r <= Rune1 {
+ goto bad;
+ }
+ S.ch = r;
+ S.pos = pos + 2;
+ return;
+ }
+
+ // 3-byte encoding
+ // 0800-FFFF => T3 Tx Tx
+ if pos + 2 >= lim {
+ goto eof;
}
+ c2 := int(src[pos + 2]) ^ Tx;
+ if c2 & Testx != 0 {
+ goto bad;
+ }
+ if c0 < T4 {
+ r := (((c0 << Bitx | c1) << Bitx) | c2) & Rune3;
+ if r <= Rune2 {
+ goto bad;
+ }
+ S.ch = r;
+ S.pos = pos + 3;
+ return;
+ }
+
+ // bad encoding
+bad:
+ S.ch = Bad;
+ S.pos += 1;
+ return;
+
+ // end of file
+eof:
+ S.ch = -1;
}
func Init () {
Keywords = new(map [string] int);
-
- Keywords["break"] = BREAK;
- Keywords["case"] = CASE;
- Keywords["const"] = CONST;
- Keywords["continue"] = CONTINUE;
- Keywords["default"] = DEFAULT;
- Keywords["else"] = ELSE;
- Keywords["export"] = EXPORT;
- Keywords["fallthrough"] = FALLTHROUGH;
- Keywords["false"] = FALSE;
- Keywords["for"] = FOR;
- Keywords["func"] = FUNC;
- Keywords["go"] = GO;
- Keywords["goto"] = GOTO;
- Keywords["if"] = IF;
- Keywords["import"] = IMPORT;
- Keywords["interface"] = INTERFACE;
- Keywords["map"] = MAP;
- Keywords["new"] = NEW;
- Keywords["nil"] = NIL;
- Keywords["package"] = PACKAGE;
- Keywords["range"] = RANGE;
- Keywords["return"] = RETURN;
- Keywords["select"] = SELECT;
- Keywords["struct"] = STRUCT;
- Keywords["switch"] = SWITCH;
- Keywords["true"] = TRUE;
- Keywords["type"] = TYPE;
- Keywords["var"] = VAR;
+
+ for i := KEYWORDS_BEG; i <= KEYWORDS_END; i++ {
+ Keywords[TokenName(i)] = i;
+ }
}
func (S *Scanner) ScanMantissa () {
- for is_dec_digit(S.ch) {
- S.Next();
+ for is_dec_digit(S.ch) {
+ S.Next();
}
}
func (S *Scanner) ScanNumber () int {
// TODO complete this routine
if S.ch == '.' {
- S.Next();
+ S.Next();
}
S.ScanMantissa();
if S.ch == 'e' || S.ch == 'E' {
- S.Next();
+ S.Next();
if S.ch == '-' || S.ch == '+' {
- S.Next();
+ S.Next();
}
S.ScanMantissa();
}
func (S *Scanner) Select2 (tok0, tok1 int) int {
- if S.ch == '=' {
- S.Next();
- return tok1;
+ if S.ch == '=' {
+ S.Next();
+ return tok1;
}
return tok0;
}
func (S *Scanner) Select3 (tok0, tok1, ch2, tok2 int) int {
if S.ch == '=' {
- S.Next();
+ S.Next();
return tok1;
}
if S.ch == ch2 {
- S.Next();
+ S.Next();
return tok2;
}
return tok0;
func (S *Scanner) Select4 (tok0, tok1, ch2, tok2, tok3 int) int {
if S.ch == '=' {
- S.Next();
+ S.Next();
return tok1;
}
if S.ch == ch2 {
- S.Next();
+ S.Next();
if S.ch == '=' {
- S.Next();
+ S.Next();
return tok3;
}
return tok2;
default:
S.Next();
switch ch {
- case -1: tok = EOF;
- case '"': tok = S.ScanString();
- case '\'': tok = S.ScanChar();
- case '`': tok = S.ScanRawString();
- case ':': tok = S.Select2(COLON, DEFINE);
- case '.':
- if is_dec_digit(S.ch) {
- tok = S.ScanNumber();
- } else {
- tok = PERIOD;
- }
- case ',': tok = COMMA;
- case ';': tok = SEMICOLON;
- case '(': tok = LPAREN;
- case ')': tok = RPAREN;
- case '[': tok = LBRACK;
- case ']': tok = RBRACK;
- case '{': tok = LBRACE;
- case '}': tok = RBRACE;
- case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
- case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
- case '*': tok = S.Select2(MUL, MUL_ASSIGN);
- case '/':
- if S.ch == '/' || S.ch == '*' {
- S.SkipComment();
- // cannot simply return because of 6g bug
- tok, beg, end = S.Scan();
- return tok, beg, end;
- }
- tok = S.Select2(QUO, QUO_ASSIGN);
- case '%': tok = S.Select2(REM, REM_ASSIGN);
- case '^': tok = S.Select2(XOR, XOR_ASSIGN);
- case '<': tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
- case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
- case '=': tok = S.Select2(ASSIGN, EQL);
- case '!': tok = S.Select2(NOT, NEQ);
- case '&': tok = S.Select3(AND, AND_ASSIGN, '&', CAND);
- case '|': tok = S.Select3(OR, OR_ASSIGN, '|', COR);
- default: tok = ILLEGAL;
-
+ case -1: tok = EOF;
+ case '"': tok = S.ScanString();
+ case '\'': tok = S.ScanChar();
+ case '`': tok = S.ScanRawString();
+ case ':': tok = S.Select2(COLON, DEFINE);
+ case '.':
+ if is_dec_digit(S.ch) {
+ tok = S.ScanNumber();
+ } else {
+ tok = PERIOD;
+ }
+ case ',': tok = COMMA;
+ case ';': tok = SEMICOLON;
+ case '(': tok = LPAREN;
+ case ')': tok = RPAREN;
+ case '[': tok = LBRACK;
+ case ']': tok = RBRACK;
+ case '{': tok = LBRACE;
+ case '}': tok = RBRACE;
+ case '+': tok = S.Select3(ADD, ADD_ASSIGN, '+', INC);
+ case '-': tok = S.Select3(SUB, SUB_ASSIGN, '-', DEC);
+ case '*': tok = S.Select2(MUL, MUL_ASSIGN);
+ case '/':
+ if S.ch == '/' || S.ch == '*' {
+ S.SkipComment();
+ // cannot simply return because of 6g bug
+ tok, beg, end = S.Scan();
+ return tok, beg, end;
+ }
+ tok = S.Select2(QUO, QUO_ASSIGN);
+ case '%': tok = S.Select2(REM, REM_ASSIGN);
+ case '^': tok = S.Select2(XOR, XOR_ASSIGN);
+ case '<': tok = S.Select4(LSS, LEQ, '<', SHL, SHL_ASSIGN);
+ case '>': tok = S.Select4(GTR, GEQ, '>', SHR, SHR_ASSIGN);
+ case '=': tok = S.Select2(ASSIGN, EQL);
+ case '!': tok = S.Select2(NOT, NEQ);
+ case '&': tok = S.Select3(AND, AND_ASSIGN, '&', CAND);
+ case '|': tok = S.Select3(OR, OR_ASSIGN, '|', COR);
+ default: tok = ILLEGAL;
}
}