From: erifan01 Date: Thu, 19 Jul 2018 09:55:17 +0000 (+0000) Subject: strings, bytes: optimize function Index X-Git-Tag: go1.12beta1~1100 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=691f5c34ad7b7d676644dd749bc4ddb5d20b90d0;p=gostls13.git strings, bytes: optimize function Index This change compares the first two characters instead of the first one, and if they match, the entire string is compared. Comparing the first two characters helps to filter out the case where the first character matches but the subsequent characters do not match, thereby improving the substring search speed in this case. Benchmarks with no effect or minimal impact (less than 5%) is not listed, the following are improved benchmarks: On arm64: strings: IndexPeriodic/IndexPeriodic16-8 172890.00ns +- 2% 124156.20ns +- 0% -28.19% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 78092.80ns +- 0% 65138.60ns +- 0% -16.59% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 42322.20ns +- 0% 34661.60ns +- 0% -18.10% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic16-8 183468.20ns +- 6% 123759.00ns +- 0% -32.54% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-8 84776.40ns +- 0% 63907.80ns +- 0% -24.62% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-8 45835.60ns +- 0% 34194.20ns +- 0% -25.40% (p=0.008 n=5+5) On amd64: strings: IndexPeriodic/IndexPeriodic8-16 219499.00ns +- 0% 178123.40ns +- 0% -18.85% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 109760.20ns +- 0% 88957.80ns +- 0% -18.95% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 54943.00ns +- 0% 44573.80ns +- 0% -18.87% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 29804.80ns +- 0% 24417.80ns +- 0% -18.07% (p=0.008 n=5+5) bytes: IndexPeriodic/IndexPeriodic8-16 226592.60ns +- 0% 181183.20ns +- 0% -20.04% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic16-16 111432.60ns +- 0% 90634.60ns +- 0% -18.66% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic32-16 55640.60ns +- 0% 45433.00ns +- 0% -18.35% (p=0.008 n=5+5) IndexPeriodic/IndexPeriodic64-16 30833.00ns +- 0% 24784.20ns +- 0% -19.62% (p=0.008 n=5+5) Change-Id: I2d9e7e138d29e960d20a203eb74dc2ec976a9d71 Reviewed-on: https://go-review.googlesource.com/131177 Run-TryBot: Ian Lance Taylor TryBot-Result: Gobot Gobot Reviewed-by: Ian Lance Taylor --- diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 77a7ce98e0..876fa3c1ed 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -849,21 +849,22 @@ func Index(s, sep []byte) int { if len(s) <= bytealg.MaxBruteForce { return bytealg.Index(s, sep) } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { + for i < t { + if s[i] != c0 { // IndexByte is faster than bytealg.Index, so use it as long as // we're not getting lots of false positives. - o := IndexByte(t[i:], c) + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } fails++ @@ -879,24 +880,25 @@ func Index(s, sep []byte) int { } return -1 } - c := sep[0] + c0 := sep[0] + c1 := sep[1] i := 0 fails := 0 - t := s[:len(s)-n+1] - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) + t := len(s) - n + 1 + for i < t { + if s[i] != c0 { + o := IndexByte(s[i:t], c0) if o < 0 { break } i += o } - if Equal(s[i:i+n], sep) { + if s[i+1] == c1 && Equal(s[i:i+n], sep) { return i } i++ fails++ - if fails >= 4+i>>4 && i < len(t) { + if fails >= 4+i>>4 && i < t { // Give up on IndexByte, it isn't skipping ahead // far enough to be better than Rabin-Karp. // Experiments (using IndexPeriodic) suggest diff --git a/src/strings/strings.go b/src/strings/strings.go index df95715ec8..26aceda212 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -947,21 +947,22 @@ func Index(s, substr string) int { if len(s) <= bytealg.MaxBruteForce { return bytealg.IndexString(s, substr) } - c := substr[0] + c0 := substr[0] + c1 := substr[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { + for i < t { + if s[i] != c0 { // IndexByte is faster than bytealg.IndexString, so use it as long as // we're not getting lots of false positives. - o := IndexByte(t[i:], c) + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if s[i:i+n] == substr { + if s[i+1] == c1 && s[i:i+n] == substr { return i } fails++ @@ -977,24 +978,25 @@ func Index(s, substr string) int { } return -1 } - c := substr[0] + c0 := substr[0] + c1 := substr[1] i := 0 - t := s[:len(s)-n+1] + t := len(s) - n + 1 fails := 0 - for i < len(t) { - if t[i] != c { - o := IndexByte(t[i:], c) + for i < t { + if s[i] != c0 { + o := IndexByte(s[i:t], c0) if o < 0 { return -1 } i += o } - if s[i:i+n] == substr { + if s[i+1] == c1 && s[i:i+n] == substr { return i } i++ fails++ - if fails >= 4+i>>4 && i < len(t) { + if fails >= 4+i>>4 && i < t { // See comment in ../bytes/bytes_generic.go. j := indexRabinKarp(s[i:], substr) if j < 0 {