]> Cypherpunks repositories - gostls13.git/commitdiff
regexp: fix one-pass compilation
authorCaleb Spare <cespare@gmail.com>
Tue, 24 Nov 2015 00:16:42 +0000 (16:16 -0800)
committerRuss Cox <rsc@golang.org>
Wed, 25 Nov 2015 17:44:17 +0000 (17:44 +0000)
The one-pass transformation is structured as a search over the input
machine for conditions that violate the one-pass requisites. At each
iteration, we should fully explore all non-input paths that proceed from
the current instruction; this is implemented via recursive check calls.
But when we reach instructions that demand input (InstRune*), these
should be put onto the search queue.

Instead of searching this way, the routine previously (effectively)
proceeded through the machine one instruction at a time until finding an
Inst{Match,Fail,Rune*}, calling check on each instruction. This caused
bug #11905, where the transformation stopped before rewriting all
InstAlts as InstAltMatches.

Further, the check function unnecessarily recurred on InstRune*
instructions. (I believe this helps to mask the above bug.)

This change also deletes some unused functions and duplicate test cases.

Fixes #11905.

Change-Id: I5b0b26efea3d3bd01c7479a518b5ed1b886701cd
Reviewed-on: https://go-review.googlesource.com/17195
Reviewed-by: Russ Cox <rsc@golang.org>
src/regexp/onepass.go
src/regexp/onepass_test.go

index 2bd81e32335598001fd9ec614fbe630fe851ba7c..2ce3902388b292e411deb3b2e5b3cf19076cfb6c 100644 (file)
@@ -113,10 +113,6 @@ func (q *queueOnePass) clear() {
        q.nextIndex = 0
 }
 
-func (q *queueOnePass) reset() {
-       q.nextIndex = 0
-}
-
 func (q *queueOnePass) contains(u uint32) bool {
        if u >= uint32(len(q.sparse)) {
                return false
@@ -313,25 +309,9 @@ func makeOnePass(p *onePassProg) *onePassProg {
        var (
                instQueue    = newQueue(len(p.Inst))
                visitQueue   = newQueue(len(p.Inst))
-               build        func(uint32, *queueOnePass)
                check        func(uint32, map[uint32]bool) bool
                onePassRunes = make([][]rune, len(p.Inst))
        )
-       build = func(pc uint32, q *queueOnePass) {
-               if q.contains(pc) {
-                       return
-               }
-               inst := p.Inst[pc]
-               switch inst.Op {
-               case syntax.InstAlt, syntax.InstAltMatch:
-                       q.insert(inst.Out)
-                       build(inst.Out, q)
-                       q.insert(inst.Arg)
-               case syntax.InstMatch, syntax.InstFail:
-               default:
-                       q.insert(inst.Out)
-               }
-       }
 
        // check that paths from Alt instructions are unambiguous, and rebuild the new
        // program as a onepass program
@@ -390,11 +370,11 @@ func makeOnePass(p *onePassProg) *onePassProg {
                        m[pc] = inst.Op == syntax.InstMatch
                        break
                case syntax.InstRune:
-                       ok = check(inst.Out, m)
                        m[pc] = false
                        if len(inst.Next) > 0 {
                                break
                        }
+                       instQueue.insert(inst.Out)
                        if len(inst.Rune) == 0 {
                                onePassRunes[pc] = []rune{}
                                inst.Next = []uint32{inst.Out}
@@ -418,11 +398,11 @@ func makeOnePass(p *onePassProg) *onePassProg {
                        }
                        inst.Op = syntax.InstRune
                case syntax.InstRune1:
-                       ok = check(inst.Out, m)
                        m[pc] = false
                        if len(inst.Next) > 0 {
                                break
                        }
+                       instQueue.insert(inst.Out)
                        runes := []rune{}
                        // expand case-folded runes
                        if syntax.Flags(inst.Arg)&syntax.FoldCase != 0 {
@@ -442,19 +422,19 @@ func makeOnePass(p *onePassProg) *onePassProg {
                        }
                        inst.Op = syntax.InstRune
                case syntax.InstRuneAny:
-                       ok = check(inst.Out, m)
                        m[pc] = false
                        if len(inst.Next) > 0 {
                                break
                        }
+                       instQueue.insert(inst.Out)
                        onePassRunes[pc] = append([]rune{}, anyRune...)
                        inst.Next = []uint32{inst.Out}
                case syntax.InstRuneAnyNotNL:
-                       ok = check(inst.Out, m)
                        m[pc] = false
                        if len(inst.Next) > 0 {
                                break
                        }
+                       instQueue.insert(inst.Out)
                        onePassRunes[pc] = append([]rune{}, anyRuneNotNL...)
                        inst.Next = []uint32{}
                        for i := len(onePassRunes[pc]) / 2; i >= 0; i-- {
@@ -468,24 +448,12 @@ func makeOnePass(p *onePassProg) *onePassProg {
        instQueue.insert(uint32(p.Start))
        m := make(map[uint32]bool, len(p.Inst))
        for !instQueue.empty() {
-               pc := instQueue.next()
-               inst := p.Inst[pc]
                visitQueue.clear()
+               pc := instQueue.next()
                if !check(uint32(pc), m) {
                        p = notOnePass
                        break
                }
-               switch inst.Op {
-               case syntax.InstAlt, syntax.InstAltMatch:
-                       instQueue.insert(inst.Out)
-                       instQueue.insert(inst.Arg)
-               case syntax.InstCapture, syntax.InstEmptyWidth, syntax.InstNop:
-                       instQueue.insert(inst.Out)
-               case syntax.InstMatch:
-               case syntax.InstFail:
-               case syntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:
-               default:
-               }
        }
        if p != notOnePass {
                for i := range p.Inst {
@@ -495,47 +463,6 @@ func makeOnePass(p *onePassProg) *onePassProg {
        return p
 }
 
-// walk visits each Inst in the prog once, and applies the argument
-// function(ip, next), in pre-order.
-func walk(prog *syntax.Prog, funcs ...func(ip, next uint32)) {
-       var walk1 func(uint32)
-       progQueue := newQueue(len(prog.Inst))
-       walk1 = func(ip uint32) {
-               if progQueue.contains(ip) {
-                       return
-               }
-               progQueue.insert(ip)
-               inst := prog.Inst[ip]
-               switch inst.Op {
-               case syntax.InstAlt, syntax.InstAltMatch:
-                       for _, f := range funcs {
-                               f(ip, inst.Out)
-                               f(ip, inst.Arg)
-                       }
-                       walk1(inst.Out)
-                       walk1(inst.Arg)
-               default:
-                       for _, f := range funcs {
-                               f(ip, inst.Out)
-                       }
-                       walk1(inst.Out)
-               }
-       }
-       walk1(uint32(prog.Start))
-}
-
-// find returns the Insts that match the argument predicate function
-func find(prog *syntax.Prog, f func(*syntax.Prog, int) bool) (matches []uint32) {
-       matches = []uint32{}
-
-       for ip := range prog.Inst {
-               if f(prog, ip) {
-                       matches = append(matches, uint32(ip))
-               }
-       }
-       return
-}
-
 var notOnePass *onePassProg = nil
 
 // compileOnePass returns a new *syntax.Prog suitable for onePass execution if the original Prog
index 7b2beea67ffb2a60f8c8e7debc358a3dea4ca500..8202ebefa59348dd5fb31e1732543e952468b8bd 100644 (file)
@@ -140,47 +140,41 @@ var onePass = &onePassProg{}
 var onePassTests = []struct {
        re      string
        onePass *onePassProg
-       prog    string
 }{
-       {`^(?:a|(?:a*))$`, notOnePass, noStr},
-       {`^(?:(a)|(?:a*))$`, notOnePass, noStr},
-       {`^(?:(?:(?:.(?:$))?))$`, onePass, `a`},
-       {`^abcd$`, onePass, `abcd`},
-       {`^abcd$`, onePass, `abcde`},
-       {`^(?:(?:a{0,})*?)$`, onePass, `a`},
-       {`^(?:(?:a+)*)$`, onePass, ``},
-       {`^(?:(?:a|(?:aa)))$`, onePass, ``},
-       {`^(?:[^\s\S])$`, onePass, ``},
-       {`^(?:(?:a{3,4}){0,})$`, notOnePass, `aaaaaa`},
-       {`^(?:(?:a+)*)$`, onePass, `a`},
-       {`^(?:(?:(?:a*)+))$`, onePass, noStr},
-       {`^(?:(?:a+)*)$`, onePass, ``},
-       {`^[a-c]+$`, onePass, `abc`},
-       {`^[a-c]*$`, onePass, `abcdabc`},
-       {`^(?:a*)$`, onePass, `aaaaaaa`},
-       {`^(?:(?:aa)|a)$`, onePass, `a`},
-       {`^[a-c]*`, notOnePass, `abcdabc`},
-       {`^[a-c]*$`, onePass, `abc`},
-       {`^...$`, onePass, ``},
-       {`^(?:a|(?:aa))$`, onePass, `a`},
-       {`^[a-c]*`, notOnePass, `abcabc`},
-       {`^a((b))c$`, onePass, noStr},
-       {`^a.[l-nA-Cg-j]?e$`, onePass, noStr},
-       {`^a((b))$`, onePass, noStr},
-       {`^a(?:(b)|(c))c$`, onePass, noStr},
-       {`^a(?:(b*)|(c))c$`, notOnePass, noStr},
-       {`^a(?:b|c)$`, onePass, noStr},
-       {`^a(?:b?|c)$`, onePass, noStr},
-       {`^a(?:b?|c?)$`, notOnePass, noStr},
-       {`^a(?:b?|c+)$`, onePass, noStr},
-       {`^a(?:b+|(bc))d$`, notOnePass, noStr},
-       {`^a(?:bc)+$`, onePass, noStr},
-       {`^a(?:[bcd])+$`, onePass, noStr},
-       {`^a((?:[bcd])+)$`, onePass, noStr},
-       {`^a(:?b|c)*d$`, onePass, `abbbccbbcbbd"`},
-       {`^.bc(d|e)*$`, onePass, `abcddddddeeeededd`},
-       {`^(?:(?:aa)|.)$`, notOnePass, `a`},
-       {`^(?:(?:a{1,2}){1,2})$`, notOnePass, `aaaa`},
+       {`^(?:a|(?:a*))$`, notOnePass},
+       {`^(?:(a)|(?:a*))$`, notOnePass},
+       {`^(?:(?:(?:.(?:$))?))$`, onePass},
+       {`^abcd$`, onePass},
+       {`^(?:(?:a{0,})*?)$`, onePass},
+       {`^(?:(?:a+)*)$`, onePass},
+       {`^(?:(?:a|(?:aa)))$`, onePass},
+       {`^(?:[^\s\S])$`, onePass},
+       {`^(?:(?:a{3,4}){0,})$`, notOnePass},
+       {`^(?:(?:(?:a*)+))$`, onePass},
+       {`^[a-c]+$`, onePass},
+       {`^[a-c]*$`, onePass},
+       {`^(?:a*)$`, onePass},
+       {`^(?:(?:aa)|a)$`, onePass},
+       {`^[a-c]*`, notOnePass},
+       {`^...$`, onePass},
+       {`^(?:a|(?:aa))$`, onePass},
+       {`^a((b))c$`, onePass},
+       {`^a.[l-nA-Cg-j]?e$`, onePass},
+       {`^a((b))$`, onePass},
+       {`^a(?:(b)|(c))c$`, onePass},
+       {`^a(?:(b*)|(c))c$`, notOnePass},
+       {`^a(?:b|c)$`, onePass},
+       {`^a(?:b?|c)$`, onePass},
+       {`^a(?:b?|c?)$`, notOnePass},
+       {`^a(?:b?|c+)$`, onePass},
+       {`^a(?:b+|(bc))d$`, notOnePass},
+       {`^a(?:bc)+$`, onePass},
+       {`^a(?:[bcd])+$`, onePass},
+       {`^a((?:[bcd])+)$`, onePass},
+       {`^a(:?b|c)*d$`, onePass},
+       {`^.bc(d|e)*$`, onePass},
+       {`^(?:(?:aa)|.)$`, notOnePass},
+       {`^(?:(?:a{1,2}){1,2})$`, notOnePass},
 }
 
 func TestCompileOnePass(t *testing.T) {
@@ -206,3 +200,28 @@ func TestCompileOnePass(t *testing.T) {
                }
        }
 }
+
+// TODO(cespare): Unify with onePassTests and rationalize one-pass test cases.
+var onePassTests1 = []struct {
+       re    string
+       match string
+}{
+       {`^a(/b+(#c+)*)*$`, "a/b#c"}, // golang.org/issue/11905
+}
+
+func TestRunOnePass(t *testing.T) {
+       for _, test := range onePassTests1 {
+               re, err := Compile(test.re)
+               if err != nil {
+                       t.Errorf("Compile(%q): got err: %s", test.re, err)
+                       continue
+               }
+               if re.onepass == notOnePass {
+                       t.Errorf("Compile(%q): got notOnePass, want one-pass", test.re)
+                       continue
+               }
+               if !re.MatchString(test.match) {
+                       t.Errorf("onepass %q did not match %q", test.re, test.match)
+               }
+       }
+}