regexp: use rune

author Russ Cox <rsc@golang.org>

Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)

committer Russ Cox <rsc@golang.org>

Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)
author Russ Cox <rsc@golang.org>
Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)
committer Russ Cox <rsc@golang.org>
Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)
diff --git a/src/pkg/regexp/exec.go b/src/pkg/regexp/exec.go

index 3b0e3888524473cb702b231fedf7782e279f133f..d7057a191b773719a178aef5eaba2001f9a6ab01 100644 (file)
--- a/src/pkg/regexp/exec.go
+++ b/src/pkg/regexp/exec.go
@@ -90,15 +90,15 @@ func (m *machine) match(i input, pos int) bool {
                 m.matchcap[i] = -1
         }
         runq, nextq := &m.q0, &m.q1
-       rune, rune1 := endOfText, endOfText
+       r, r1 := endOfText, endOfText
         width, width1 := 0, 0
-       rune, width = i.step(pos)
-       if rune != endOfText {
-               rune1, width1 = i.step(pos + width)
+       r, width = i.step(pos)
+       if r != endOfText {
+               r1, width1 = i.step(pos + width)
         }
         var flag syntax.EmptyOp
         if pos == 0 {
-               flag = syntax.EmptyOpContext(-1, rune)
+               flag = syntax.EmptyOpContext(-1, r)
         } else {
                 flag = i.context(pos)
         }
@@ -112,15 +112,15 @@ func (m *machine) match(i input, pos int) bool {
                                 // Have match; finished exploring alternatives.
                                 break
                         }
-                       if len(m.re.prefix) > 0 && rune1 != m.re.prefixRune && i.canCheckPrefix() {
+                       if len(m.re.prefix) > 0 && r1 != m.re.prefixRune && i.canCheckPrefix() {
                                 // Match requires literal prefix; fast search for it.
                                 advance := i.index(m.re, pos)
                                 if advance < 0 {
                                         break
                                 }
                                 pos += advance
-                               rune, width = i.step(pos)
-                               rune1, width1 = i.step(pos + width)
+                               r, width = i.step(pos)
+                               r1, width1 = i.step(pos + width)
                         }
                 }
                 if !m.matched {
@@ -129,8 +129,8 @@ func (m *machine) match(i input, pos int) bool {
                         }
                         m.add(runq, uint32(m.p.Start), pos, m.matchcap, flag, nil)
                 }
-               flag = syntax.EmptyOpContext(rune, rune1)
-               m.step(runq, nextq, pos, pos+width, rune, flag)
+               flag = syntax.EmptyOpContext(r, r1)
+               m.step(runq, nextq, pos, pos+width, r, flag)
                 if width == 0 {
                         break
                 }
@@ -140,9 +140,9 @@ func (m *machine) match(i input, pos int) bool {
                         break
                 }
                 pos += width
-               rune, width = rune1, width1
-               if rune != endOfText {
-                       rune1, width1 = i.step(pos + width)
+               r, width = r1, width1
+               if r != endOfText {
+                       r1, width1 = i.step(pos + width)
                 }
                 runq, nextq = nextq, runq
         }
@@ -166,7 +166,7 @@ func (m *machine) clear(q *queue) {
  // The step processes the rune c (which may be endOfText),
  // which starts at position pos and ends at nextPos.
  // nextCond gives the setting for the empty-width flags after c.
-func (m *machine) step(runq, nextq *queue, pos, nextPos, c int, nextCond syntax.EmptyOp) {
+func (m *machine) step(runq, nextq *queue, pos, nextPos int, c rune, nextCond syntax.EmptyOp) {
         longest := m.re.longest
         for j := 0; j < len(runq.dense); j++ {
                 d := &runq.dense[j]
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go

index 2325f6204b19267e0328d91e3c461cefdfb7e4d2..a1b7951bfe8e1be908c59f65852d9b3528487073 100644 (file)
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -83,7 +83,7 @@ type Regexp struct {
         prefix         string         // required prefix in unanchored matches
         prefixBytes    []byte         // prefix, as a []byte
         prefixComplete bool           // prefix is the entire regexp
-       prefixRune     int            // first rune in prefix
+       prefixRune     rune           // first rune in prefix
         cond           syntax.EmptyOp // empty-width conditions required at start of match
         numSubexp      int
         longest        bool
@@ -224,13 +224,13 @@ func (re *Regexp) NumSubexp() int {
         return re.numSubexp
  }
  
-const endOfText = -1
+const endOfText rune = -1
  
  // input abstracts different representations of the input text. It provides
  // one-character lookahead.
  type input interface {
-       step(pos int) (rune int, width int) // advance one rune
-       canCheckPrefix() bool               // can we look ahead without losing info?
+       step(pos int) (r rune, width int) // advance one rune
+       canCheckPrefix() bool             // can we look ahead without losing info?
         hasPrefix(re *Regexp) bool
         index(re *Regexp, pos int) int
         context(pos int) syntax.EmptyOp
@@ -245,11 +245,11 @@ func newInputString(str string) *inputString {
         return &inputString{str: str}
  }
  
-func (i *inputString) step(pos int) (int, int) {
+func (i *inputString) step(pos int) (rune, int) {
         if pos < len(i.str) {
                 c := i.str[pos]
                 if c < utf8.RuneSelf {
-                       return int(c), 1
+                       return rune(c), 1
                 }
                 return utf8.DecodeRuneInString(i.str[pos:])
         }
@@ -269,7 +269,7 @@ func (i *inputString) index(re *Regexp, pos int) int {
  }
  
  func (i *inputString) context(pos int) syntax.EmptyOp {
-       r1, r2 := -1, -1
+       r1, r2 := endOfText, endOfText
         if pos > 0 && pos <= len(i.str) {
                 r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
         }
@@ -288,11 +288,11 @@ func newInputBytes(str []byte) *inputBytes {
         return &inputBytes{str: str}
  }
  
-func (i *inputBytes) step(pos int) (int, int) {
+func (i *inputBytes) step(pos int) (rune, int) {
         if pos < len(i.str) {
                 c := i.str[pos]
                 if c < utf8.RuneSelf {
-                       return int(c), 1
+                       return rune(c), 1
                 }
                 return utf8.DecodeRune(i.str[pos:])
         }
@@ -312,7 +312,7 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
  }
  
  func (i *inputBytes) context(pos int) syntax.EmptyOp {
-       r1, r2 := -1, -1
+       r1, r2 := endOfText, endOfText
         if pos > 0 && pos <= len(i.str) {
                 r1, _ = utf8.DecodeLastRune(i.str[:pos])
         }
@@ -333,7 +333,7 @@ func newInputReader(r io.RuneReader) *inputReader {
         return &inputReader{r: r}
  }
  
-func (i *inputReader) step(pos int) (int, int) {
+func (i *inputReader) step(pos int) (rune, int) {
         if !i.atEOT && pos != i.pos {
                 return endOfText, 0
  
diff --git a/src/pkg/regexp/syntax/compile.go b/src/pkg/regexp/syntax/compile.go

index c415d39a57e927a3918b46c0de6cb0440b33c348..c90de3fe99d7e0fba989ce7da68c19379dcb1a4b 100644 (file)
--- a/src/pkg/regexp/syntax/compile.go
+++ b/src/pkg/regexp/syntax/compile.go
@@ -91,8 +91,8 @@ func (c *compiler) init() {
         c.inst(InstFail)
  }
  
-var anyRuneNotNL = []int{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
-var anyRune = []int{0, unicode.MaxRune}
+var anyRuneNotNL = []rune{0, '\n' - 1, '\n' + 1, unicode.MaxRune}
+var anyRune = []rune{0, unicode.MaxRune}
  
  func (c *compiler) compile(re *Regexp) frag {
         switch re.Op {
@@ -262,12 +262,12 @@ func (c *compiler) empty(op EmptyOp) frag {
         return f
  }
  
-func (c *compiler) rune(rune []int, flags Flags) frag {
+func (c *compiler) rune(r []rune, flags Flags) frag {
         f := c.inst(InstRune)
         i := &c.p.Inst[f.i]
-       i.Rune = rune
+       i.Rune = r
         flags &= FoldCase // only relevant flag is FoldCase
-       if len(rune) != 1 || unicode.SimpleFold(rune[0]) == rune[0] {
+       if len(r) != 1 || unicode.SimpleFold(r[0]) == r[0] {
                 // and sometimes not even that
                 flags &^= FoldCase
         }
@@ -276,11 +276,11 @@ func (c *compiler) rune(rune []int, flags Flags) frag {
  
         // Special cases for exec machine.
         switch {
-       case flags&FoldCase == 0 && (len(rune) == 1 || len(rune) == 2 && rune[0] == rune[1]):
+       case flags&FoldCase == 0 && (len(r) == 1 || len(r) == 2 && r[0] == r[1]):
                 i.Op = InstRune1
-       case len(rune) == 2 && rune[0] == 0 && rune[1] == unicode.MaxRune:
+       case len(r) == 2 && r[0] == 0 && r[1] == unicode.MaxRune:
                 i.Op = InstRuneAny
-       case len(rune) == 4 && rune[0] == 0 && rune[1] == '\n'-1 && rune[2] == '\n'+1 && rune[3] == unicode.MaxRune:
+       case len(r) == 4 && r[0] == 0 && r[1] == '\n'-1 && r[2] == '\n'+1 && r[3] == unicode.MaxRune:
                 i.Op = InstRuneAnyNotNL
         }
  
diff --git a/src/pkg/regexp/syntax/make_perl_groups.pl b/src/pkg/regexp/syntax/make_perl_groups.pl

index 6d1b84b10033da42125dd6e8a8901c63cb2235ac..d024f5090e9e8b8304aac8dedecd1a47fc82f462 100755 (executable)
--- a/src/pkg/regexp/syntax/make_perl_groups.pl
+++ b/src/pkg/regexp/syntax/make_perl_groups.pl
@@ -57,7 +57,7 @@ sub ComputeClass($) {
  
  sub PrintClass($$@) {
    my ($cname, $name, @ranges) = @_;
-  print "var code$cname = []int{  /* $name */\n";
+  print "var code$cname = []rune{  /* $name */\n";
    for (my $i=0; $i<@ranges; $i++) {
      my @a = @{$ranges[$i]};
      printf "\t0x%x, 0x%x,\n", $a[0], $a[1];
diff --git a/src/pkg/regexp/syntax/parse.go b/src/pkg/regexp/syntax/parse.go

index 1165db3e221e901c6ce56f8c4aaa72b062424c2b..f5602628e70aab3c878b01cfb41a79ff980bad50 100644 (file)
--- a/src/pkg/regexp/syntax/parse.go
+++ b/src/pkg/regexp/syntax/parse.go
@@ -82,7 +82,7 @@ type parser struct {
         free        *Regexp
         numCap      int // number of capturing groups seen
         wholeRegexp string
-       tmpClass    []int // temporary char class work space
+       tmpClass    []rune // temporary char class work space
  }
  
  func (p *parser) newRegexp(op Op) *Regexp {
@@ -149,7 +149,7 @@ func (p *parser) push(re *Regexp) *Regexp {
  // If r >= 0 and there's a node left over, maybeConcat uses it
  // to push r with the given flags.
  // maybeConcat reports whether r was pushed.
-func (p *parser) maybeConcat(r int, flags Flags) bool {
+func (p *parser) maybeConcat(r rune, flags Flags) bool {
         n := len(p.stack)
         if n < 2 {
                 return false
@@ -178,7 +178,7 @@ func (p *parser) maybeConcat(r int, flags Flags) bool {
  }
  
  // newLiteral returns a new OpLiteral Regexp with the given flags
-func (p *parser) newLiteral(r int, flags Flags) *Regexp {
+func (p *parser) newLiteral(r rune, flags Flags) *Regexp {
         re := p.newRegexp(OpLiteral)
         re.Flags = flags
         if flags&FoldCase != 0 {
@@ -190,7 +190,7 @@ func (p *parser) newLiteral(r int, flags Flags) *Regexp {
  }
  
  // minFoldRune returns the minimum rune fold-equivalent to r.
-func minFoldRune(r int) int {
+func minFoldRune(r rune) rune {
         if r < minFold || r > maxFold {
                 return r
         }
@@ -206,7 +206,7 @@ func minFoldRune(r int) int {
  
  // literal pushes a literal regexp for the rune r on the stack
  // and returns that regexp.
-func (p *parser) literal(r int) {
+func (p *parser) literal(r rune) {
         p.push(p.newLiteral(r, p.flags))
  }
  
@@ -369,7 +369,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
         }
  
         // Round 1: Factor out common literal prefixes.
-       var str []int
+       var str []rune
         var strflags Flags
         start := 0
         out := sub[:0]
@@ -380,7 +380,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
                 //
                 // Invariant: sub[start:i] consists of regexps that all begin
                 // with str as modified by strflags.
-               var istr []int
+               var istr []rune
                 var iflags Flags
                 if i < len(sub) {
                         istr, iflags = p.leadingString(sub[i])
@@ -543,7 +543,7 @@ func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
  
  // leadingString returns the leading literal string that re begins with.
  // The string refers to storage in re or its children.
-func (p *parser) leadingString(re *Regexp) ([]int, Flags) {
+func (p *parser) leadingString(re *Regexp) ([]rune, Flags) {
         if re.Op == OpConcat && len(re.Sub) > 0 {
                 re = re.Sub[0]
         }
@@ -639,7 +639,7 @@ func literalRegexp(s string, flags Flags) *Regexp {
         for _, c := range s {
                 if len(re.Rune) >= cap(re.Rune) {
                         // string is too long to fit in Rune0.  let Go handle it
-                       re.Rune = []int(s)
+                       re.Rune = []rune(s)
                         break
                 }
                 re.Rune = append(re.Rune, c)
@@ -662,7 +662,7 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
         var (
                 p          parser
                 err        os.Error
-               c          int
+               c          rune
                 op         Op
                 lastRepeat string
                 min, max   int
@@ -935,7 +935,7 @@ func (p *parser) parsePerlFlags(s string) (rest string, err os.Error) {
         }
  
         // Non-capturing group.  Might also twiddle Perl flags.
-       var c int
+       var c rune
         t = t[2:] // skip (?
         flags := p.flags
         sign := +1
@@ -1049,7 +1049,7 @@ func isCharClass(re *Regexp) bool {
  }
  
  // does re match r?
-func matchRune(re *Regexp, r int) bool {
+func matchRune(re *Regexp, r rune) bool {
         switch re.Op {
         case OpLiteral:
                 return len(re.Rune) == 1 && re.Rune[0] == r
@@ -1186,7 +1186,7 @@ func (p *parser) parseRightParen() os.Error {
  
  // parseEscape parses an escape sequence at the beginning of s
  // and returns the rune.
-func (p *parser) parseEscape(s string) (r int, rest string, err os.Error) {
+func (p *parser) parseEscape(s string) (r rune, rest string, err os.Error) {
         t := s[1:]
         if t == "" {
                 return 0, "", &Error{ErrTrailingBackslash, ""}
@@ -1221,7 +1221,7 @@ Switch:
                         if t == "" || t[0] < '0' || t[0] > '7' {
                                 break
                         }
-                       r = r*8 + int(t[0]) - '0'
+                       r = r*8 + rune(t[0]) - '0'
                         t = t[1:]
                 }
                 return r, t, nil
@@ -1302,7 +1302,7 @@ Switch:
  
  // parseClassChar parses a character class character at the beginning of s
  // and returns it.
-func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err os.Error) {
+func (p *parser) parseClassChar(s, wholeClass string) (r rune, rest string, err os.Error) {
         if s == "" {
                 return 0, "", &Error{Code: ErrMissingBracket, Expr: wholeClass}
         }
@@ -1318,13 +1318,13 @@ func (p *parser) parseClassChar(s, wholeClass string) (r int, rest string, err o
  
  type charGroup struct {
         sign  int
-       class []int
+       class []rune
  }
  
  // parsePerlClassEscape parses a leading Perl character class escape like \d
  // from the beginning of s.  If one is present, it appends the characters to r
  // and returns the new slice r and the remainder of the string.
-func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string) {
+func (p *parser) parsePerlClassEscape(s string, r []rune) (out []rune, rest string) {
         if p.flags&PerlX == 0 || len(s) < 2 || s[0] != '\\' {
                 return
         }
@@ -1338,7 +1338,7 @@ func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string
  // parseNamedClass parses a leading POSIX named character class like [:alnum:]
  // from the beginning of s.  If one is present, it appends the characters to r
  // and returns the new slice r and the remainder of the string.
-func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err os.Error) {
+func (p *parser) parseNamedClass(s string, r []rune) (out []rune, rest string, err os.Error) {
         if len(s) < 2 || s[0] != '[' || s[1] != ':' {
                 return
         }
@@ -1356,7 +1356,7 @@ func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err
         return p.appendGroup(r, g), s, nil
  }
  
-func (p *parser) appendGroup(r []int, g charGroup) []int {
+func (p *parser) appendGroup(r []rune, g charGroup) []rune {
         if p.flags&FoldCase == 0 {
                 if g.sign < 0 {
                         r = appendNegatedClass(r, g.class)
@@ -1401,7 +1401,7 @@ func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
  // parseUnicodeClass parses a leading Unicode character class like \p{Han}
  // from the beginning of s.  If one is present, it appends the characters to r
  // and returns the new slice r and the remainder of the string.
-func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, err os.Error) {
+func (p *parser) parseUnicodeClass(s string, r []rune) (out []rune, rest string, err os.Error) {
         if p.flags&UnicodeGroups == 0 || len(s) < 2 || s[0] != '\\' || s[1] != 'p' && s[1] != 'P' {
                 return
         }
@@ -1533,7 +1533,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
  
                 // Single character or simple range.
                 rng := t
-               var lo, hi int
+               var lo, hi rune
                 if lo, t, err = p.parseClassChar(t, s); err != nil {
                         return "", err
                 }
@@ -1570,7 +1570,7 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
  
  // cleanClass sorts the ranges (pairs of elements of r),
  // merges them, and eliminates duplicates.
-func cleanClass(rp *[]int) []int {
+func cleanClass(rp *[]rune) []rune {
  
         // Sort by lo increasing, hi decreasing to break ties.
         sort.Sort(ranges{rp})
@@ -1601,7 +1601,7 @@ func cleanClass(rp *[]int) []int {
  }
  
  // appendLiteral returns the result of appending the literal x to the class r.
-func appendLiteral(r []int, x int, flags Flags) []int {
+func appendLiteral(r []rune, x rune, flags Flags) []rune {
         if flags&FoldCase != 0 {
                 return appendFoldedRange(r, x, x)
         }
@@ -1609,7 +1609,7 @@ func appendLiteral(r []int, x int, flags Flags) []int {
  }
  
  // appendRange returns the result of appending the range lo-hi to the class r.
-func appendRange(r []int, lo, hi int) []int {
+func appendRange(r []rune, lo, hi rune) []rune {
         // Expand last range or next to last range if it overlaps or abuts.
         // Checking two ranges helps when appending case-folded
         // alphabets, so that one range can be expanding A-Z and the
@@ -1642,7 +1642,7 @@ const (
  
  // appendFoldedRange returns the result of appending the range lo-hi
  // and its case folding-equivalent runes to the class r.
-func appendFoldedRange(r []int, lo, hi int) []int {
+func appendFoldedRange(r []rune, lo, hi rune) []rune {
         // Optimizations.
         if lo <= minFold && hi >= maxFold {
                 // Range is full: folding can't add more.
@@ -1677,7 +1677,7 @@ func appendFoldedRange(r []int, lo, hi int) []int {
  
  // appendClass returns the result of appending the class x to the class r.
  // It assume x is clean.
-func appendClass(r []int, x []int) []int {
+func appendClass(r []rune, x []rune) []rune {
         for i := 0; i < len(x); i += 2 {
                 r = appendRange(r, x[i], x[i+1])
         }
@@ -1685,7 +1685,7 @@ func appendClass(r []int, x []int) []int {
  }
  
  // appendFolded returns the result of appending the case folding of the class x to the class r.
-func appendFoldedClass(r []int, x []int) []int {
+func appendFoldedClass(r []rune, x []rune) []rune {
         for i := 0; i < len(x); i += 2 {
                 r = appendFoldedRange(r, x[i], x[i+1])
         }
@@ -1694,8 +1694,8 @@ func appendFoldedClass(r []int, x []int) []int {
  
  // appendNegatedClass returns the result of appending the negation of the class x to the class r.
  // It assumes x is clean.
-func appendNegatedClass(r []int, x []int) []int {
-       nextLo := 0
+func appendNegatedClass(r []rune, x []rune) []rune {
+       nextLo := rune('\u0000')
         for i := 0; i < len(x); i += 2 {
                 lo, hi := x[i], x[i+1]
                 if nextLo <= lo-1 {
@@ -1710,9 +1710,9 @@ func appendNegatedClass(r []int, x []int) []int {
  }
  
  // appendTable returns the result of appending x to the class r.
-func appendTable(r []int, x *unicode.RangeTable) []int {
+func appendTable(r []rune, x *unicode.RangeTable) []rune {
         for _, xr := range x.R16 {
-               lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+               lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
                 if stride == 1 {
                         r = appendRange(r, lo, hi)
                         continue
@@ -1722,7 +1722,7 @@ func appendTable(r []int, x *unicode.RangeTable) []int {
                 }
         }
         for _, xr := range x.R32 {
-               lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+               lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
                 if stride == 1 {
                         r = appendRange(r, lo, hi)
                         continue
@@ -1735,10 +1735,10 @@ func appendTable(r []int, x *unicode.RangeTable) []int {
  }
  
  // appendNegatedTable returns the result of appending the negation of x to the class r.
-func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
-       nextLo := 0 // lo end of next class to add
+func appendNegatedTable(r []rune, x *unicode.RangeTable) []rune {
+       nextLo := rune('\u0000') // lo end of next class to add
         for _, xr := range x.R16 {
-               lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+               lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
                 if stride == 1 {
                         if nextLo <= lo-1 {
                                 r = appendRange(r, nextLo, lo-1)
@@ -1754,7 +1754,7 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
                 }
         }
         for _, xr := range x.R32 {
-               lo, hi, stride := int(xr.Lo), int(xr.Hi), int(xr.Stride)
+               lo, hi, stride := rune(xr.Lo), rune(xr.Hi), rune(xr.Stride)
                 if stride == 1 {
                         if nextLo <= lo-1 {
                                 r = appendRange(r, nextLo, lo-1)
@@ -1777,9 +1777,9 @@ func appendNegatedTable(r []int, x *unicode.RangeTable) []int {
  
  // negateClass overwrites r and returns r's negation.
  // It assumes the class r is already clean.
-func negateClass(r []int) []int {
-       nextLo := 0 // lo end of next class to add
-       w := 0      // write index
+func negateClass(r []rune) []rune {
+       nextLo := rune('\u0000') // lo end of next class to add
+       w := 0                   // write index
         for i := 0; i < len(r); i += 2 {
                 lo, hi := r[i], r[i+1]
                 if nextLo <= lo-1 {
@@ -1801,9 +1801,9 @@ func negateClass(r []int) []int {
  // ranges implements sort.Interface on a []rune.
  // The choice of receiver type definition is strange
  // but avoids an allocation since we already have
-// a *[]int.
+// a *[]rune.
  type ranges struct {
-       p *[]int
+       p *[]rune
  }
  
  func (ra ranges) Less(i, j int) bool {
@@ -1835,7 +1835,7 @@ func checkUTF8(s string) os.Error {
         return nil
  }
  
-func nextRune(s string) (c int, t string, err os.Error) {
+func nextRune(s string) (c rune, t string, err os.Error) {
         c, size := utf8.DecodeRuneInString(s)
         if c == utf8.RuneError && size == 1 {
                 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
@@ -1843,11 +1843,11 @@ func nextRune(s string) (c int, t string, err os.Error) {
         return c, s[size:], nil
  }
  
-func isalnum(c int) bool {
+func isalnum(c rune) bool {
         return '0' <= c && c <= '9' || 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
  }
  
-func unhex(c int) int {
+func unhex(c rune) rune {
         if '0' <= c && c <= '9' {
                 return c - '0'
         }
diff --git a/src/pkg/regexp/syntax/parse_test.go b/src/pkg/regexp/syntax/parse_test.go

index f20276c59ab6d2ecde31fc2a57dd4a904f00e1bb..c6e63392c9c2b8f50733a00c323086f0037bd39b 100644 (file)
--- a/src/pkg/regexp/syntax/parse_test.go
+++ b/src/pkg/regexp/syntax/parse_test.go
@@ -371,10 +371,10 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {
         b.WriteByte('}')
  }
  
-func mkCharClass(f func(int) bool) string {
+func mkCharClass(f func(rune) bool) string {
         re := &Regexp{Op: OpCharClass}
-       lo := -1
-       for i := 0; i <= unicode.MaxRune; i++ {
+       lo := rune(-1)
+       for i := rune(0); i <= unicode.MaxRune; i++ {
                 if f(i) {
                         if lo < 0 {
                                 lo = i
@@ -392,12 +392,12 @@ func mkCharClass(f func(int) bool) string {
         return dump(re)
  }
  
-func isUpperFold(rune int) bool {
-       if unicode.IsUpper(rune) {
+func isUpperFold(r rune) bool {
+       if unicode.IsUpper(r) {
                 return true
         }
-       c := unicode.SimpleFold(rune)
-       for c != rune {
+       c := unicode.SimpleFold(r)
+       for c != r {
                 if unicode.IsUpper(c) {
                         return true
                 }
@@ -407,8 +407,8 @@ func isUpperFold(rune int) bool {
  }
  
  func TestFoldConstants(t *testing.T) {
-       last := -1
-       for i := 0; i <= unicode.MaxRune; i++ {
+       last := rune(-1)
+       for i := rune(0); i <= unicode.MaxRune; i++ {
                 if unicode.SimpleFold(i) == i {
                         continue
                 }
@@ -427,8 +427,8 @@ func TestAppendRangeCollapse(t *testing.T) {
         // into the earlier ones (it looks back two ranges), so that
         // the slice never grows very large.
         // Note that we are not calling cleanClass.
-       var r []int
-       for i := 'A'; i <= 'Z'; i++ {
+       var r []rune
+       for i := rune('A'); i <= 'Z'; i++ {
                 r = appendRange(r, i, i)
                 r = appendRange(r, i+'a'-'A', i+'a'-'A')
         }
diff --git a/src/pkg/regexp/syntax/perl_groups.go b/src/pkg/regexp/syntax/perl_groups.go

index 05b392c40d80efd2186b3c16154b940c1d0efd34..1a11ca62f0c0de7d7748110dd0a76482798597c7 100644 (file)
--- a/src/pkg/regexp/syntax/perl_groups.go
+++ b/src/pkg/regexp/syntax/perl_groups.go
@@ -3,17 +3,17 @@
  
  package syntax
  
-var code1 = []int{ /* \d */
+var code1 = []rune{ /* \d */
         0x30, 0x39,
  }
  
-var code2 = []int{ /* \s */
+var code2 = []rune{ /* \s */
         0x9, 0xa,
         0xc, 0xd,
         0x20, 0x20,
  }
  
-var code3 = []int{ /* \w */
+var code3 = []rune{ /* \w */
         0x30, 0x39,
         0x41, 0x5a,
         0x5f, 0x5f,
@@ -28,71 +28,71 @@ var perlGroup = map[string]charGroup{
         `\w`: {+1, code3},
         `\W`: {-1, code3},
  }
-var code4 = []int{ /* [:alnum:] */
+var code4 = []rune{ /* [:alnum:] */
         0x30, 0x39,
         0x41, 0x5a,
         0x61, 0x7a,
  }
  
-var code5 = []int{ /* [:alpha:] */
+var code5 = []rune{ /* [:alpha:] */
         0x41, 0x5a,
         0x61, 0x7a,
  }
  
-var code6 = []int{ /* [:ascii:] */
+var code6 = []rune{ /* [:ascii:] */
         0x0, 0x7f,
  }
  
-var code7 = []int{ /* [:blank:] */
+var code7 = []rune{ /* [:blank:] */
         0x9, 0x9,
         0x20, 0x20,
  }
  
-var code8 = []int{ /* [:cntrl:] */
+var code8 = []rune{ /* [:cntrl:] */
         0x0, 0x1f,
         0x7f, 0x7f,
  }
  
-var code9 = []int{ /* [:digit:] */
+var code9 = []rune{ /* [:digit:] */
         0x30, 0x39,
  }
  
-var code10 = []int{ /* [:graph:] */
+var code10 = []rune{ /* [:graph:] */
         0x21, 0x7e,
  }
  
-var code11 = []int{ /* [:lower:] */
+var code11 = []rune{ /* [:lower:] */
         0x61, 0x7a,
  }
  
-var code12 = []int{ /* [:print:] */
+var code12 = []rune{ /* [:print:] */
         0x20, 0x7e,
  }
  
-var code13 = []int{ /* [:punct:] */
+var code13 = []rune{ /* [:punct:] */
         0x21, 0x2f,
         0x3a, 0x40,
         0x5b, 0x60,
         0x7b, 0x7e,
  }
  
-var code14 = []int{ /* [:space:] */
+var code14 = []rune{ /* [:space:] */
         0x9, 0xd,
         0x20, 0x20,
  }
  
-var code15 = []int{ /* [:upper:] */
+var code15 = []rune{ /* [:upper:] */
         0x41, 0x5a,
  }
  
-var code16 = []int{ /* [:word:] */
+var code16 = []rune{ /* [:word:] */
         0x30, 0x39,
         0x41, 0x5a,
         0x5f, 0x5f,
         0x61, 0x7a,
  }
  
-var code17 = []int{ /* [:xdigit:] */
+var code17 = []rune{ /* [:xdigit:] */
         0x30, 0x39,
         0x41, 0x46,
         0x61, 0x66,
diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go

index ced45da077b8316b2db5bec58e012f3941623684..f5b697a59ae02693a8c30e02c10e5cf814e1d59c 100644 (file)
--- a/src/pkg/regexp/syntax/prog.go
+++ b/src/pkg/regexp/syntax/prog.go
@@ -51,7 +51,7 @@ const (
  // at the beginning of the text.
  // Passing r2 == -1 indicates that the position is
  // at the end of the text.
-func EmptyOpContext(r1, r2 int) EmptyOp {
+func EmptyOpContext(r1, r2 rune) EmptyOp {
         var op EmptyOp
         if r1 < 0 {
                 op |= EmptyBeginText | EmptyBeginLine
@@ -76,7 +76,7 @@ func EmptyOpContext(r1, r2 int) EmptyOp {
  // IsWordChar reports whether r is consider a ``word character''
  // during the evaluation of the \b and \B zero-width assertions.
  // These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
-func IsWordChar(r int) bool {
+func IsWordChar(r rune) bool {
         return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_'
  }
  
@@ -85,7 +85,7 @@ type Inst struct {
         Op   InstOp
         Out  uint32 // all but InstMatch, InstFail
         Arg  uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
-       Rune []int
+       Rune []rune
  }
  
  func (p *Prog) String() string {
@@ -161,7 +161,7 @@ Loop:
  
  // MatchRune returns true if the instruction matches (and consumes) r.
  // It should only be called when i.Op == InstRune.
-func (i *Inst) MatchRune(r int) bool {
+func (i *Inst) MatchRune(r rune) bool {
         rune := i.Rune
  
         // Special case: single-rune slice is from literal string, not char class.
@@ -210,17 +210,17 @@ func (i *Inst) MatchRune(r int) bool {
  
  // As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
  // Since we act on runes, it would be easy to support Unicode here.
-func wordRune(rune int) bool {
-       return rune == '_' ||
-               ('A' <= rune && rune <= 'Z') ||
-               ('a' <= rune && rune <= 'z') ||
-               ('0' <= rune && rune <= '9')
+func wordRune(r rune) bool {
+       return r == '_' ||
+               ('A' <= r && r <= 'Z') ||
+               ('a' <= r && r <= 'z') ||
+               ('0' <= r && r <= '9')
  }
  
  // MatchEmptyWidth returns true if the instruction matches
  // an empty string between the runes before and after.
  // It should only be called when i.Op == InstEmptyWidth.
-func (i *Inst) MatchEmptyWidth(before int, after int) bool {
+func (i *Inst) MatchEmptyWidth(before rune, after rune) bool {
         switch EmptyOp(i.Arg) {
         case EmptyBeginLine:
                 return before == '\n' || before == -1
diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go

index 033848df28a780c2f10e6965b9d3f3de04f29a9b..b5ddab1d16bbe5258765ea8453c49447c008b4dd 100644 (file)
--- a/src/pkg/regexp/syntax/regexp.go
+++ b/src/pkg/regexp/syntax/regexp.go
@@ -22,8 +22,8 @@ type Regexp struct {
         Flags    Flags
         Sub      []*Regexp  // subexpressions, if any
         Sub0     [1]*Regexp // storage for short Sub
-       Rune     []int      // matched runes, for OpLiteral, OpCharClass
-       Rune0    [2]int     // storage for short Rune
+       Rune     []rune     // matched runes, for OpLiteral, OpCharClass
+       Rune0    [2]rune    // storage for short Rune
         Min, Max int        // min, max for OpRepeat
         Cap      int        // capturing index, for OpCapture
         Name     string     // capturing name, for OpCapture
@@ -252,7 +252,7 @@ func (re *Regexp) String() string {
  
  const meta = `\.+*?()|[]{}^$`
  
-func escape(b *bytes.Buffer, r int, force bool) {
+func escape(b *bytes.Buffer, r rune, force bool) {
         if unicode.IsPrint(r) {
                 if strings.IndexRune(meta, r) >= 0 || force {
                         b.WriteRune('\\')
@@ -277,7 +277,7 @@ func escape(b *bytes.Buffer, r int, force bool) {
         default:
                 if r < 0x100 {
                         b.WriteString(`\x`)
-                       s := strconv.Itob(r, 16)
+                       s := strconv.Itob(int(r), 16)
                         if len(s) == 1 {
                                 b.WriteRune('0')
                         }
@@ -285,7 +285,7 @@ func escape(b *bytes.Buffer, r int, force bool) {
                         break
                 }
                 b.WriteString(`\x{`)
-               b.WriteString(strconv.Itob(r, 16))
+               b.WriteString(strconv.Itob(int(r), 16))
                 b.WriteString(`}`)
         }
  }
author	Russ Cox <rsc@golang.org>
	Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)
committer	Russ Cox <rsc@golang.org>
	Wed, 26 Oct 2011 05:20:57 +0000 (22:20 -0700)
src/pkg/regexp/exec.go		patch \| blob \| history
src/pkg/regexp/regexp.go		patch \| blob \| history
src/pkg/regexp/syntax/compile.go		patch \| blob \| history
src/pkg/regexp/syntax/make_perl_groups.pl		patch \| blob \| history
src/pkg/regexp/syntax/parse.go		patch \| blob \| history
src/pkg/regexp/syntax/parse_test.go		patch \| blob \| history
src/pkg/regexp/syntax/perl_groups.go		patch \| blob \| history
src/pkg/regexp/syntax/prog.go		patch \| blob \| history
src/pkg/regexp/syntax/regexp.go		patch \| blob \| history