]> Cypherpunks repositories - gostls13.git/commitdiff
regexp: fix performance bug, make anchored searches fail fast.
authorRob Pike <r@golang.org>
Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
committerRob Pike <r@golang.org>
Mon, 3 Jan 2011 19:31:51 +0000 (11:31 -0800)
The bug was that for an anchored pattern such as ^x, the prefix
scan ignored the anchor, and could scan the whole file if there was
no x present.  The fix is to do prefix matching after the anchor;
the cost miniscule; the speedups huge.

R=rsc, gri
CC=golang-dev
https://golang.org/cl/3837042

src/pkg/regexp/all_test.go
src/pkg/regexp/regexp.go

index 8f115aa49ccb6a2a98283e6f766f5b318a33c241..3b2c489bcef5f105f5d9d8b002c5b5649985683c 100644 (file)
@@ -377,3 +377,49 @@ func BenchmarkReplaceAll(b *testing.B) {
                re.ReplaceAllString(x, "")
        }
 }
+
+func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       re := MustCompile("^zbc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       for i := 0; i < 15; i++ {
+               x = append(x, x...)
+       }
+       re := MustCompile("^zbc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredShortMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       re := MustCompile("^.bc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
+
+func BenchmarkAnchoredLongMatch(b *testing.B) {
+       b.StopTimer()
+       x := []byte("abcdefghijklmnopqrstuvwxyz")
+       for i := 0; i < 15; i++ {
+               x = append(x, x...)
+       }
+       re := MustCompile("^.bc(d|e)")
+       b.StartTimer()
+       for i := 0; i < b.N; i++ {
+               re.Match(x)
+       }
+}
index ef6a8aa0ba9aa010db2fe9a3f791f7dbc93be688..4d13fad8b3c67df0ce292125a77505e6c6d8bbf9 100644 (file)
@@ -571,15 +571,20 @@ func (re *Regexp) doParse() {
        }
 }
 
-// Extract regular text from the beginning of the pattern.
+// Extract regular text from the beginning of the pattern,
+// possibly after a leading iBOT.
 // That text can be used by doExecute to speed up matching.
 func (re *Regexp) setPrefix() {
        var b []byte
        var utf = make([]byte, utf8.UTFMax)
        var inst *instr
-       // First instruction is start; skip that.
+       // First instruction is start; skip that.  Also skip any initial iBOT.
+       inst = re.inst[0].next
+       for inst.kind == iBOT {
+               inst = inst.next
+       }
 Loop:
-       for inst = re.inst[0].next; inst.kind != iEnd; inst = inst.next {
+       for ; inst.kind != iEnd; inst = inst.next {
                // stop if this is not a char
                if inst.kind != iChar {
                        break
@@ -748,14 +753,30 @@ func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
        if bytestr != nil {
                end = len(bytestr)
        }
+       anchored := re.inst[0].next.kind == iBOT
+       if anchored && pos > 0 {
+               return nil
+       }
        // fast check for initial plain substring
        prefixed := false // has this iteration begun by skipping a prefix?
        if re.prefix != "" {
-               var advance int
-               if bytestr == nil {
-                       advance = strings.Index(str[pos:], re.prefix)
+               advance := 0
+               if anchored {
+                       if bytestr == nil {
+                               if !strings.HasPrefix(str, re.prefix) {
+                                       return nil
+                               }
+                       } else {
+                               if !bytes.HasPrefix(bytestr, re.prefixBytes) {
+                                       return nil
+                               }
+                       }
                } else {
-                       advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+                       if bytestr == nil {
+                               advance = strings.Index(str[pos:], re.prefix)
+                       } else {
+                               advance = bytes.Index(bytestr[pos:], re.prefixBytes)
+                       }
                }
                if advance == -1 {
                        return nil