regexp: interpret all Go characer escapes \a \b \f \n \r \t \v

author Rob Pike <r@golang.org>

Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)

committer Rob Pike <r@golang.org>

Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)
author Rob Pike <r@golang.org>
Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)
committer Rob Pike <r@golang.org>
Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)
diff --git a/src/pkg/regexp/find_test.go b/src/pkg/regexp/find_test.go

index d0aad82b71dda30422cb04133363749df892895c..6a34cabf5b1cbedf842f933688a09f4cbf1fd5d9 100644 (file)
--- a/src/pkg/regexp/find_test.go
+++ b/src/pkg/regexp/find_test.go
@@ -57,6 +57,8 @@ var findTests = []FindTest{
         FindTest{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
         FindTest{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
         FindTest{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
+       FindTest{`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
+       FindTest{`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)},
  
         FindTest{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
         FindTest{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go

index fce76953e3f4f7c40514c71624591720ddb24dea..f3e07d74a4b6792082f42f384522437544c8cb00 100644 (file)
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -22,7 +22,8 @@
  //             character [ '-' character ]
  //
  // All characters are UTF-8-encoded code points.  Backslashes escape special
-// characters, including inside character classes.
+// characters, including inside character classes.  The standard Go character
+// escapes are also recognized: \a \b \f \n \r \t \v.
  //
  // There are 16 methods of Regexp that match a regular expression and identify
  // the matched text.  Their names are matched by this regular expression:
@@ -353,6 +354,18 @@ func ispunct(c int) bool {
         return false
  }
  
+var escapes = []byte("abfnrtv")
+var escaped = []byte("\a\b\f\n\r\t\v")
+
+func escape(c int) int {
+       for i, b := range escapes {
+               if int(b) == c {
+                       return i
+               }
+       }
+       return -1
+}
+
  func (p *parser) charClass() instr {
         cc := newCharClass()
         if p.c() == '^' {
@@ -388,10 +401,10 @@ func (p *parser) charClass() instr {
                         switch {
                         case c == endOfFile:
                                 p.error(ErrExtraneousBackslash)
-                       case c == 'n':
-                               c = '\n'
                         case ispunct(c):
                                 // c is as delivered
+                       case escape(c) >= 0:
+                               c = int(escaped[escape(c)])
                         default:
                                 p.error(ErrBadBackslash)
                         }
@@ -483,10 +496,10 @@ func (p *parser) term() (start, end instr) {
                 switch {
                 case c == endOfFile:
                         p.error(ErrExtraneousBackslash)
-               case c == 'n':
-                       c = '\n'
                 case ispunct(c):
                         // c is as delivered
+               case escape(c) >= 0:
+                       c = int(escaped[escape(c)])
                 default:
                         p.error(ErrBadBackslash)
                 }
author	Rob Pike <r@golang.org>
	Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)
committer	Rob Pike <r@golang.org>
	Mon, 30 Aug 2010 04:06:59 +0000 (14:06 +1000)
src/pkg/regexp/find_test.go		patch \| blob \| history
src/pkg/regexp/regexp.go		patch \| blob \| history