]> Cypherpunks repositories - gostls13.git/commitdiff
regexp: add ASCII fast path for context methods
authorMartin Möhrmann <moehrmann@google.com>
Sat, 4 Mar 2017 06:18:26 +0000 (07:18 +0100)
committerBrad Fitzpatrick <bradfitz@golang.org>
Thu, 23 Mar 2017 00:08:20 +0000 (00:08 +0000)
The step method implementations check directly if the next rune
only needs one byte to be decoded and avoid calling utf8.DecodeRune
for such ASCII characters.

Introduce the same fast path optimization for rune decoding
for the context methods.

Results for regexp benchmarks that use the context methods:

name                            old time/op  new time/op  delta
AnchoredLiteralShortNonMatch-4  97.5ns ± 1%  94.8ns ± 2%  -2.80%  (p=0.000 n=45+43)
AnchoredShortMatch-4             163ns ± 1%   160ns ± 1%  -1.84%  (p=0.000 n=46+47)
NotOnePassShortA-4               742ns ± 2%   742ns ± 2%    ~     (p=0.440 n=49+50)
NotOnePassShortB-4               535ns ± 1%   533ns ± 2%  -0.37%  (p=0.005 n=46+48)
OnePassLongPrefix-4              169ns ± 2%   166ns ± 2%  -2.06%  (p=0.000 n=50+49)

Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335
Reviewed-on: https://go-review.googlesource.com/38256
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/regexp/regexp.go

index 01093d4bd0d73d76d1cb47116defc5b3e1dbcc5c..4b34d53c8a9eb5e4d21abbcd902f9278628c3e4b 100644 (file)
@@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int {
 
 func (i *inputString) context(pos int) syntax.EmptyOp {
        r1, r2 := endOfText, endOfText
-       if pos > 0 && pos <= len(i.str) {
-               r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+       // 0 < pos && pos <= len(i.str)
+       if uint(pos-1) < uint(len(i.str)) {
+               r1 = rune(i.str[pos-1])
+               if r1 >= utf8.RuneSelf {
+                       r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
+               }
        }
-       if pos < len(i.str) {
-               r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+       // 0 <= pos && pos < len(i.str)
+       if uint(pos) < uint(len(i.str)) {
+               r2 = rune(i.str[pos])
+               if r2 >= utf8.RuneSelf {
+                       r2, _ = utf8.DecodeRuneInString(i.str[pos:])
+               }
        }
        return syntax.EmptyOpContext(r1, r2)
 }
@@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int {
 
 func (i *inputBytes) context(pos int) syntax.EmptyOp {
        r1, r2 := endOfText, endOfText
-       if pos > 0 && pos <= len(i.str) {
-               r1, _ = utf8.DecodeLastRune(i.str[:pos])
+       // 0 < pos && pos <= len(i.str)
+       if uint(pos-1) < uint(len(i.str)) {
+               r1 = rune(i.str[pos-1])
+               if r1 >= utf8.RuneSelf {
+                       r1, _ = utf8.DecodeLastRune(i.str[:pos])
+               }
        }
-       if pos < len(i.str) {
-               r2, _ = utf8.DecodeRune(i.str[pos:])
+       // 0 <= pos && pos < len(i.str)
+       if uint(pos) < uint(len(i.str)) {
+               r2 = rune(i.str[pos])
+               if r2 >= utf8.RuneSelf {
+                       r2, _ = utf8.DecodeRune(i.str[pos:])
+               }
        }
        return syntax.EmptyOpContext(r1, r2)
 }