From: Andy Balholm Date: Mon, 15 Jun 2020 22:54:49 +0000 (-0700) Subject: regexp/syntax: append patchLists in constant time X-Git-Tag: go1.15rc1~89 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0ce1ffc49d3935a41e1fff2aec224c254a9fad1d;p=gostls13.git regexp/syntax: append patchLists in constant time By keeping a tail pointer, we can append to a patchList in constant time, rather than in time proportional to the length of the list. This gets rid of the quadratic compile times we were seeing for long series of alternations. This is basically the same change as https://github.com/google/re2/commit/e9d517989f66f2e0a24cde42f4d2424dd3e4a9b9. Fixes #39542. Change-Id: Ib4ca0ca9c55abd1594df1984653c7d311ccf7572 Reviewed-on: https://go-review.googlesource.com/c/go/+/238079 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- diff --git a/src/regexp/syntax/compile.go b/src/regexp/syntax/compile.go index 1d8ab87a6d..7524d628fe 100644 --- a/src/regexp/syntax/compile.go +++ b/src/regexp/syntax/compile.go @@ -12,57 +12,47 @@ import "unicode" // See https://swtch.com/~rsc/regexp/regexp1.html for inspiration. // // These aren't really pointers: they're integers, so we can reinterpret them -// this way without using package unsafe. A value l denotes -// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1). -// l == 0 denotes the empty list, okay because we start every program +// this way without using package unsafe. A value l.head denotes +// p.inst[l.head>>1].Out (l.head&1==0) or .Arg (l.head&1==1). +// head == 0 denotes the empty list, okay because we start every program // with a fail instruction, so we'll never want to point at its output link. -type patchList uint32 +type patchList struct { + head, tail uint32 +} -func (l patchList) next(p *Prog) patchList { - i := &p.Inst[l>>1] - if l&1 == 0 { - return patchList(i.Out) - } - return patchList(i.Arg) +func makePatchList(n uint32) patchList { + return patchList{n, n} } func (l patchList) patch(p *Prog, val uint32) { - for l != 0 { - i := &p.Inst[l>>1] - if l&1 == 0 { - l = patchList(i.Out) + head := l.head + for head != 0 { + i := &p.Inst[head>>1] + if head&1 == 0 { + head = i.Out i.Out = val } else { - l = patchList(i.Arg) + head = i.Arg i.Arg = val } } } func (l1 patchList) append(p *Prog, l2 patchList) patchList { - if l1 == 0 { + if l1.head == 0 { return l2 } - if l2 == 0 { + if l2.head == 0 { return l1 } - last := l1 - for { - next := last.next(p) - if next == 0 { - break - } - last = next - } - - i := &p.Inst[last>>1] - if last&1 == 0 { - i.Out = uint32(l2) + i := &p.Inst[l1.tail>>1] + if l1.tail&1 == 0 { + i.Out = l2.head } else { - i.Arg = uint32(l2) + i.Arg = l2.head } - return l1 + return patchList{l1.head, l2.tail} } // A frag represents a compiled program fragment. @@ -176,7 +166,7 @@ func (c *compiler) inst(op InstOp) frag { func (c *compiler) nop() frag { f := c.inst(InstNop) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -186,7 +176,7 @@ func (c *compiler) fail() frag { func (c *compiler) cap(arg uint32) frag { f := c.inst(InstCapture) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) c.p.Inst[f.i].Arg = arg if c.p.NumCap < int(arg)+1 { @@ -229,10 +219,10 @@ func (c *compiler) quest(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f.out = f.out.append(c.p, f1.out) return f @@ -243,10 +233,10 @@ func (c *compiler) star(f1 frag, nongreedy bool) frag { i := &c.p.Inst[f.i] if nongreedy { i.Arg = f1.i - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) } else { i.Out = f1.i - f.out = patchList(f.i<<1 | 1) + f.out = makePatchList(f.i<<1 | 1) } f1.out.patch(c.p, f.i) return f @@ -259,7 +249,7 @@ func (c *compiler) plus(f1 frag, nongreedy bool) frag { func (c *compiler) empty(op EmptyOp) frag { f := c.inst(InstEmptyWidth) c.p.Inst[f.i].Arg = uint32(op) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) return f } @@ -273,7 +263,7 @@ func (c *compiler) rune(r []rune, flags Flags) frag { flags &^= FoldCase } i.Arg = uint32(flags) - f.out = patchList(f.i << 1) + f.out = makePatchList(f.i << 1) // Special cases for exec machine. switch {