index int // used only in debugging; could be eliminated
next *instr // the instruction to execute after this one
// Special fields valid only for some items.
- char int // iChar
+ char rune // iChar
braNum int // iBra, iEbra
cclass *charClass // iCharClass
left *instr // iAlt, other branch
type charClass struct {
negate bool // is character class negated? ([^a-z])
// slice of int, stored pairwise: [a-z] is (a,z); x is (x,x):
- ranges []int
- cmin, cmax int
+ ranges []rune
+ cmin, cmax rune
}
func (cclass *charClass) print() {
}
}
-func (cclass *charClass) addRange(a, b int) {
+func (cclass *charClass) addRange(a, b rune) {
// range is a through b inclusive
cclass.ranges = append(cclass.ranges, a, b)
if a < cclass.cmin {
}
}
-func (cclass *charClass) matches(c int) bool {
+func (cclass *charClass) matches(c rune) bool {
if c < cclass.cmin || c > cclass.cmax {
return cclass.negate
}
func newCharClass() *instr {
i := &instr{kind: iCharClass}
i.cclass = new(charClass)
- i.cclass.ranges = make([]int, 0, 4)
+ i.cclass.ranges = make([]rune, 0, 4)
i.cclass.cmin = 0x10FFFF + 1 // MaxRune + 1
i.cclass.cmax = -1
return i
re *Regexp
nlpar int // number of unclosed lpars
pos int
- ch int
+ ch rune
}
func (p *parser) error(err Error) {
const endOfText = -1
-func (p *parser) c() int { return p.ch }
+func (p *parser) c() rune { return p.ch }
-func (p *parser) nextc() int {
+func (p *parser) nextc() rune {
if p.pos >= len(p.re.expr) {
p.ch = endOfText
} else {
return p
}
-func special(c int) bool {
+func special(c rune) bool {
for _, r := range `\.+*?()|[]^$` {
if c == r {
return true
return false
}
-func ispunct(c int) bool {
+func ispunct(c rune) bool {
for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" {
if c == r {
return true
var escapes = []byte("abfnrtv")
var escaped = []byte("\a\b\f\n\r\t\v")
-func escape(c int) int {
+func escape(c rune) int {
for i, b := range escapes {
- if int(b) == c {
+ if rune(b) == c {
return i
}
}
return -1
}
-func (p *parser) checkBackslash() int {
+func (p *parser) checkBackslash() rune {
c := p.c()
if c == '\\' {
c = p.nextc()
case ispunct(c):
// c is as delivered
case escape(c) >= 0:
- c = int(escaped[escape(c)])
+ c = rune(escaped[escape(c)])
default:
p.error(ErrBadBackslash)
}
cc.negate = true
p.nextc()
}
- left := -1
+ left := rune(-1)
for {
switch c := p.c(); c {
case ']', endOfText:
// input abstracts different representations of the input text. It provides
// one-character lookahead.
type input interface {
- step(pos int) (rune int, width int) // advance one rune
- canCheckPrefix() bool // can we look ahead without losing info?
+ step(pos int) (r rune, width int) // advance one rune
+ canCheckPrefix() bool // can we look ahead without losing info?
hasPrefix(re *Regexp) bool
index(re *Regexp, pos int) int
}
return &inputString{str: str}
}
-func (i *inputString) step(pos int) (int, int) {
+func (i *inputString) step(pos int) (rune, int) {
if pos < len(i.str) {
return utf8.DecodeRuneInString(i.str[pos:len(i.str)])
}
return &inputBytes{str: str}
}
-func (i *inputBytes) step(pos int) (int, int) {
+func (i *inputBytes) step(pos int) (rune, int) {
if pos < len(i.str) {
return utf8.DecodeRune(i.str[pos:len(i.str)])
}
return &inputReader{r: r}
}
-func (i *inputReader) step(pos int) (int, int) {
+func (i *inputReader) step(pos int) (rune, int) {
if !i.atEOT && pos != i.pos {
return endOfText, 0
atBOT: pos == 0,
atEOT: nextChar == endOfText,
}
- for c, startPos := 0, pos; c != endOfText; {
+ for c, startPos := rune(0), pos; c != endOfText; {
if !found && (pos == startPos || !anchored) {
// prime the pump if we haven't seen a match yet
match := arena.noMatch()
// of the regular expression re. It returns the boolean true if the
// literal string comprises the entire regular expression.
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
- c := make([]int, len(re.inst)-2) // minus start and end.
+ c := make([]rune, len(re.inst)-2) // minus start and end.
// First instruction is start; skip that.
i := 0
for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next {
// A byte loop is correct because all metacharacters are ASCII.
j := 0
for i := 0; i < len(s); i++ {
- if special(int(s[i])) {
+ if special(rune(s[i])) {
b[j] = '\\'
j++
}
for i := 0; i < len(b); i++ {
c := b[i]
- if !jsIsSpecial(int(c)) {
+ if !jsIsSpecial(rune(c)) {
// fast path: nothing to do
continue
}
}
} else {
// Unicode rune.
- rune, size := utf8.DecodeRune(b[i:])
- if unicode.IsPrint(rune) {
+ r, size := utf8.DecodeRune(b[i:])
+ if unicode.IsPrint(r) {
w.Write(b[i : i+size])
} else {
// TODO(dsymonds): Do this without fmt?
- fmt.Fprintf(w, "\\u%04X", rune)
+ fmt.Fprintf(w, "\\u%04X", r)
}
i += size - 1
}
return b.String()
}
-func jsIsSpecial(rune int) bool {
- switch rune {
+func jsIsSpecial(r rune) bool {
+ switch r {
case '\\', '\'', '"', '<', '>':
return true
}
- return rune < ' ' || utf8.RuneSelf <= rune
+ return r < ' ' || utf8.RuneSelf <= r
}
// JSEscaper returns the escaped JavaScript equivalent of the textual
}
// next returns the next rune in the input.
-func (l *lexer) next() (rune int) {
+func (l *lexer) next() (r rune) {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
- rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
+ r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
- return rune
+ return r
}
// peek returns but does not consume the next rune in the input.
-func (l *lexer) peek() int {
- rune := l.next()
+func (l *lexer) peek() rune {
+ r := l.next()
l.backup()
- return rune
+ return r
}
// backup steps back one rune. Can only be called once per call of next.
}
// isSpace reports whether r is a space character.
-func isSpace(r int) bool {
+func isSpace(r rune) bool {
switch r {
case ' ', '\t', '\n', '\r':
return true
}
// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
-func isAlphaNumeric(r int) bool {
+func isAlphaNumeric(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}