From: Martin Möhrmann Date: Sat, 4 Mar 2017 06:18:26 +0000 (+0100) Subject: regexp: add ASCII fast path for context methods X-Git-Tag: go1.9beta1~1026 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d;p=gostls13.git regexp: add ASCII fast path for context methods The step method implementations check directly if the next rune only needs one byte to be decoded and avoid calling utf8.DecodeRune for such ASCII characters. Introduce the same fast path optimization for rune decoding for the context methods. Results for regexp benchmarks that use the context methods: name old time/op new time/op delta AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43) AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47) NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50) NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48) OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49) Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335 Reviewed-on: https://go-review.googlesource.com/38256 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 01093d4bd0..4b34d53c8a 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int { func (i *inputString) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } @@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { func (i *inputBytes) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRune(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRune(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRune(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) }