regexp: fix performance bug, make anchored searches fail fast.

author Rob Pike <r@golang.org>

Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)

committer Rob Pike <r@golang.org>

Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
author Rob Pike <r@golang.org>
Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
committer Rob Pike <r@golang.org>
Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go

index 8f115aa49ccb6a2a98283e6f766f5b318a33c241..3b2c489bcef5f105f5d9d8b002c5b5649985683c 100644 (file)
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -377,3 +377,49 @@ func BenchmarkReplaceAll(b *testing.B) {
                 re.ReplaceAllString(x, "")
         }
  }
+
+func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       re := MustCompile("^zbc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       for i := 0; i < 15; i++ {
+               x = append(x, x...)
+       }
+       re := MustCompile("^zbc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredShortMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       re := MustCompile("^.bc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredLongMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       for i := 0; i < 15; i++ {
+               x = append(x, x...)
+       }
+       re := MustCompile("^.bc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go

index ef6a8aa0ba9aa010db2fe9a3f791f7dbc93be688..4d13fad8b3c67df0ce292125a77505e6c6d8bbf9 100644 (file)
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -571,15 +571,20 @@ func (re *Regexp) doParse() {
         }
  }
  
-// Extract regular text from the beginning of the pattern.
+// Extract regular text from the beginning of the pattern,
+// possibly after a leading iBOT.
  // That text can be used by doExecute to speed up matching.
  func (re *Regexp) setPrefix() {
         var b []byte
         var utf = make([]byte, utf8.UTFMax)
         var inst *instr
-       // First instruction is start; skip that.
+       // First instruction is start; skip that.  Also skip any initial iBOT.
+       inst = re.inst[0].next
+       for inst.kind == iBOT {
+               inst = inst.next
+       }
  Loop:
-       for inst = re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+       for ; inst.kind != iEnd; inst = inst.next {
                 // stop if this is not a char
                 if inst.kind != iChar {
                         break
@@ -748,14 +753,30 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
         if bytestr != nil {
                 end = len(bytestr)
         }
+       anchored := re.inst[0].next.kind == iBOT
+       if anchored && pos > 0 {
+               return nil
+       }
         // fast check for initial plain substring
         prefixed := false // has this iteration begun by skipping a prefix?
         if re.prefix != "" {
-               var advance int
-               if bytestr == nil {
-                       advance = strings.Index(str[pos:], re.prefix)
+               advance := 0
+               if anchored {
+                       if bytestr == nil {
+                               if !strings.HasPrefix(str, re.prefix) {
+                                       return nil
+                               }
+                       } else {
+                               if !bytes.HasPrefix(bytestr, re.prefixBytes) {
+                                       return nil
+                               }
+                       }
                 } else {
-                       advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+                       if bytestr == nil {
+                               advance = strings.Index(str[pos:], re.prefix)
+                       } else {
+                               advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+                       }
                 }
                 if advance == -1 {
                         return nil
author	Rob Pike <r@golang.org>
	Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
committer	Rob Pike <r@golang.org>
	Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
src/pkg/regexp/all_test.go		patch \| blob \| history
src/pkg/regexp/regexp.go		patch \| blob \| history