]> Cypherpunks repositories - gostls13.git/commitdiff
regexp: allow patterns with no alternates to be one-pass
authorBryan Boreham <bjboreham@gmail.com>
Mon, 27 May 2024 16:55:02 +0000 (16:55 +0000)
committerGopher Robot <gobot@golang.org>
Wed, 24 Jul 2024 01:01:48 +0000 (01:01 +0000)
Check whether a regex has any 'alt' instructions before rejecting it as one-pass.
Previously `^abc` would run the backtrack matcher.

I tried to make the comment match what the code does now.

Updates #21463

```
name                            old time/op    new time/op    delta
Find-8                             167ns ± 1%     170ns ± 3%     ~     (p=0.500 n=5+5)
FindAllNoMatches-8                88.8ns ± 5%    87.3ns ± 0%     ~     (p=0.095 n=5+5)
FindString-8                       166ns ± 3%     164ns ± 0%     ~     (p=0.063 n=5+5)
FindSubmatch-8                     191ns ± 1%     191ns ± 0%     ~     (p=0.556 n=4+5)
FindStringSubmatch-8               183ns ± 0%     182ns ± 0%   -0.43%  (p=0.048 n=5+5)
Literal-8                         50.3ns ± 0%    50.1ns ± 0%   -0.40%  (p=0.016 n=5+4)
NotLiteral-8                       914ns ± 0%     927ns ± 7%     ~     (p=0.730 n=5+5)
MatchClass-8                      1.20µs ± 1%    1.22µs ± 6%     ~     (p=0.738 n=5+5)
MatchClass_InRange-8              1.20µs ± 6%    1.21µs ± 6%     ~     (p=0.548 n=5+5)
ReplaceAll-8                       796ns ± 0%     792ns ± 0%   -0.51%  (p=0.032 n=5+5)
AnchoredLiteralShortNonMatch-8    41.0ns ± 2%    34.2ns ± 2%  -16.47%  (p=0.008 n=5+5)
AnchoredLiteralLongNonMatch-8     53.3ns ± 0%    34.3ns ± 3%  -35.74%  (p=0.008 n=5+5)
AnchoredShortMatch-8              74.0ns ± 2%    75.8ns ± 0%   +2.46%  (p=0.032 n=5+4)
AnchoredLongMatch-8                146ns ± 3%      76ns ± 1%  -48.12%  (p=0.008 n=5+5)
OnePassShortA-8                    424ns ± 0%     423ns ± 0%     ~     (p=0.222 n=5+4)
NotOnePassShortA-8                 373ns ± 1%     375ns ± 2%     ~     (p=0.690 n=5+5)
OnePassShortB-8                    315ns ± 2%     308ns ± 0%   -2.12%  (p=0.008 n=5+5)
NotOnePassShortB-8                 244ns ± 3%     239ns ± 0%     ~     (p=0.476 n=5+5)
OnePassLongPrefix-8               61.6ns ± 2%    60.9ns ± 0%   -1.13%  (p=0.016 n=5+4)
OnePassLongNotPrefix-8             236ns ± 3%     230ns ± 0%     ~     (p=0.143 n=5+5)
```

Change-Id: I8a94b53bc761cd7ec89923c905ec8baaaa58a5fd
GitHub-Last-Rev: e9e0c29b7448c8ab7cb203c1ed58766dc5d91456
GitHub-Pull-Request: golang/go#48748
Reviewed-on: https://go-review.googlesource.com/c/go/+/353711
Reviewed-by: Daniel Martí <mvdan@mvdan.cc>
Reviewed-by: Russ Cox <rsc@golang.org>
Auto-Submit: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Commit-Queue: Ian Lance Taylor <iant@google.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
src/regexp/onepass.go
src/regexp/onepass_test.go

index 53cbd958394120f513e75a0790c65e619b87c139..96e360661b9aa9407c879d008e6e3ad61ddd68ad 100644 (file)
@@ -465,12 +465,20 @@ func compileOnePass(prog *syntax.Prog) (p *onePassProg) {
                syntax.EmptyOp(prog.Inst[prog.Start].Arg)&syntax.EmptyBeginText != syntax.EmptyBeginText {
                return nil
        }
-       // every instruction leading to InstMatch must be EmptyEndText
+       hasAlt := false
+       for _, inst := range prog.Inst {
+               if inst.Op == syntax.InstAlt || inst.Op == syntax.InstAltMatch {
+                       hasAlt = true
+                       break
+               }
+       }
+       // If we have alternates, every instruction leading to InstMatch must be EmptyEndText.
+       // Also, any match on empty text must be $.
        for _, inst := range prog.Inst {
                opOut := prog.Inst[inst.Out].Op
                switch inst.Op {
                default:
-                       if opOut == syntax.InstMatch {
+                       if opOut == syntax.InstMatch && hasAlt {
                                return nil
                        }
                case syntax.InstAlt, syntax.InstAltMatch:
index 3f44dc7b150533de1c8dab01cd603637b48b22ee..70eba1f577a9f852b7c43aaf4bef3cac709c19b9 100644 (file)
@@ -142,6 +142,7 @@ var onePassTests = []struct {
        {`^(?:(a)|(?:a*))$`, false},
        {`^(?:(?:(?:.(?:$))?))$`, true},
        {`^abcd$`, true},
+       {`^abcd`, true},
        {`^(?:(?:a{0,})*?)$`, false},
        {`^(?:(?:a+)*)$`, true},
        {`^(?:(?:a|(?:aa)))$`, true},
@@ -154,6 +155,7 @@ var onePassTests = []struct {
        {`^(?:(?:aa)|a)$`, true},
        {`^[a-c]*`, false},
        {`^...$`, true},
+       {`^...`, true},
        {`^(?:a|(?:aa))$`, true},
        {`^a((b))c$`, true},
        {`^a.[l-nA-Cg-j]?e$`, true},