From e74c6cd3c05fda74fc8cac7a24b22b8b55a2239d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Martin=20M=C3=B6hrmann?= Date: Sat, 4 Mar 2017 07:18:26 +0100 Subject: [PATCH] regexp: add ASCII fast path for context methods MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The step method implementations check directly if the next rune only needs one byte to be decoded and avoid calling utf8.DecodeRune for such ASCII characters. Introduce the same fast path optimization for rune decoding for the context methods. Results for regexp benchmarks that use the context methods: name old time/op new time/op delta AnchoredLiteralShortNonMatch-4 97.5ns ± 1% 94.8ns ± 2% -2.80% (p=0.000 n=45+43) AnchoredShortMatch-4 163ns ± 1% 160ns ± 1% -1.84% (p=0.000 n=46+47) NotOnePassShortA-4 742ns ± 2% 742ns ± 2% ~ (p=0.440 n=49+50) NotOnePassShortB-4 535ns ± 1% 533ns ± 2% -0.37% (p=0.005 n=46+48) OnePassLongPrefix-4 169ns ± 2% 166ns ± 2% -2.06% (p=0.000 n=50+49) Change-Id: Ib302d9e8c63333f02695369fcf9963974362e335 Reviewed-on: https://go-review.googlesource.com/38256 Reviewed-by: Brad Fitzpatrick Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot --- src/regexp/regexp.go | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 01093d4bd0..4b34d53c8a 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -313,11 +313,19 @@ func (i *inputString) index(re *Regexp, pos int) int { func (i *inputString) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } @@ -352,11 +360,19 @@ func (i *inputBytes) index(re *Regexp, pos int) int { func (i *inputBytes) context(pos int) syntax.EmptyOp { r1, r2 := endOfText, endOfText - if pos > 0 && pos <= len(i.str) { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) + // 0 < pos && pos <= len(i.str) + if uint(pos-1) < uint(len(i.str)) { + r1 = rune(i.str[pos-1]) + if r1 >= utf8.RuneSelf { + r1, _ = utf8.DecodeLastRune(i.str[:pos]) + } } - if pos < len(i.str) { - r2, _ = utf8.DecodeRune(i.str[pos:]) + // 0 <= pos && pos < len(i.str) + if uint(pos) < uint(len(i.str)) { + r2 = rune(i.str[pos]) + if r2 >= utf8.RuneSelf { + r2, _ = utf8.DecodeRune(i.str[pos:]) + } } return syntax.EmptyOpContext(r1, r2) } -- 2.48.1