]> Cypherpunks repositories - gostls13.git/commitdiff
strings, bytes: optimize function Index
authorerifan01 <eric.fang@arm.com>
Thu, 19 Jul 2018 09:55:17 +0000 (09:55 +0000)
committerIan Lance Taylor <iant@golang.org>
Thu, 13 Sep 2018 16:21:55 +0000 (16:21 +0000)
This change compares the first two characters instead of the first one,
and if they match, the entire string is compared. Comparing the first two
characters helps to filter out the case where the first character matches
but the subsequent characters do not match, thereby improving the substring
search speed in this case.

Benchmarks with no effect or minimal impact (less than 5%) is not listed,
the following are improved benchmarks:
On arm64:
strings:
IndexPeriodic/IndexPeriodic16-8   172890.00ns +- 2%   124156.20ns +- 0%  -28.19%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-8    78092.80ns +- 0%    65138.60ns +- 0%  -16.59%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-8    42322.20ns +- 0%    34661.60ns +- 0%  -18.10%  (p=0.008 n=5+5)
bytes:
IndexPeriodic/IndexPeriodic16-8     183468.20ns +- 6%     123759.00ns +- 0%  -32.54%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-8      84776.40ns +- 0%      63907.80ns +- 0%  -24.62%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-8      45835.60ns +- 0%      34194.20ns +- 0%  -25.40%  (p=0.008 n=5+5)

On amd64:
strings:
IndexPeriodic/IndexPeriodic8-16    219499.00ns +- 0%   178123.40ns +- 0%  -18.85%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic16-16   109760.20ns +- 0%    88957.80ns +- 0%  -18.95%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-16    54943.00ns +- 0%    44573.80ns +- 0%  -18.87%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-16    29804.80ns +- 0%    24417.80ns +- 0%  -18.07%  (p=0.008 n=5+5)
bytes:
IndexPeriodic/IndexPeriodic8-16     226592.60ns +- 0%    181183.20ns +- 0%  -20.04%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic16-16    111432.60ns +- 0%     90634.60ns +- 0%  -18.66%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic32-16     55640.60ns +- 0%     45433.00ns +- 0%  -18.35%  (p=0.008 n=5+5)
IndexPeriodic/IndexPeriodic64-16     30833.00ns +- 0%     24784.20ns +- 0%  -19.62%  (p=0.008 n=5+5)

Change-Id: I2d9e7e138d29e960d20a203eb74dc2ec976a9d71
Reviewed-on: https://go-review.googlesource.com/131177
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
src/bytes/bytes.go
src/strings/strings.go

index 77a7ce98e077ce2ca240a76a5fc5389b788d459e..876fa3c1edda659d6960acd942b05f0f81db4365 100644 (file)
@@ -849,21 +849,22 @@ func Index(s, sep []byte) int {
                if len(s) <= bytealg.MaxBruteForce {
                        return bytealg.Index(s, sep)
                }
-               c := sep[0]
+               c0 := sep[0]
+               c1 := sep[1]
                i := 0
-               t := s[:len(s)-n+1]
+               t := len(s) - n + 1
                fails := 0
-               for i < len(t) {
-                       if t[i] != c {
+               for i < t {
+                       if s[i] != c0 {
                                // IndexByte is faster than bytealg.Index, so use it as long as
                                // we're not getting lots of false positives.
-                               o := IndexByte(t[i:], c)
+                               o := IndexByte(s[i:t], c0)
                                if o < 0 {
                                        return -1
                                }
                                i += o
                        }
-                       if Equal(s[i:i+n], sep) {
+                       if s[i+1] == c1 && Equal(s[i:i+n], sep) {
                                return i
                        }
                        fails++
@@ -879,24 +880,25 @@ func Index(s, sep []byte) int {
                }
                return -1
        }
-       c := sep[0]
+       c0 := sep[0]
+       c1 := sep[1]
        i := 0
        fails := 0
-       t := s[:len(s)-n+1]
-       for i < len(t) {
-               if t[i] != c {
-                       o := IndexByte(t[i:], c)
+       t := len(s) - n + 1
+       for i < t {
+               if s[i] != c0 {
+                       o := IndexByte(s[i:t], c0)
                        if o < 0 {
                                break
                        }
                        i += o
                }
-               if Equal(s[i:i+n], sep) {
+               if s[i+1] == c1 && Equal(s[i:i+n], sep) {
                        return i
                }
                i++
                fails++
-               if fails >= 4+i>>4 && i < len(t) {
+               if fails >= 4+i>>4 && i < t {
                        // Give up on IndexByte, it isn't skipping ahead
                        // far enough to be better than Rabin-Karp.
                        // Experiments (using IndexPeriodic) suggest
index df95715ec859a0577209aa2d6dc34acebb71e93f..26aceda2121b1d94056ed0befb5fb8df0c8403be 100644 (file)
@@ -947,21 +947,22 @@ func Index(s, substr string) int {
                if len(s) <= bytealg.MaxBruteForce {
                        return bytealg.IndexString(s, substr)
                }
-               c := substr[0]
+               c0 := substr[0]
+               c1 := substr[1]
                i := 0
-               t := s[:len(s)-n+1]
+               t := len(s) - n + 1
                fails := 0
-               for i < len(t) {
-                       if t[i] != c {
+               for i < t {
+                       if s[i] != c0 {
                                // IndexByte is faster than bytealg.IndexString, so use it as long as
                                // we're not getting lots of false positives.
-                               o := IndexByte(t[i:], c)
+                               o := IndexByte(s[i:t], c0)
                                if o < 0 {
                                        return -1
                                }
                                i += o
                        }
-                       if s[i:i+n] == substr {
+                       if s[i+1] == c1 && s[i:i+n] == substr {
                                return i
                        }
                        fails++
@@ -977,24 +978,25 @@ func Index(s, substr string) int {
                }
                return -1
        }
-       c := substr[0]
+       c0 := substr[0]
+       c1 := substr[1]
        i := 0
-       t := s[:len(s)-n+1]
+       t := len(s) - n + 1
        fails := 0
-       for i < len(t) {
-               if t[i] != c {
-                       o := IndexByte(t[i:], c)
+       for i < t {
+               if s[i] != c0 {
+                       o := IndexByte(s[i:t], c0)
                        if o < 0 {
                                return -1
                        }
                        i += o
                }
-               if s[i:i+n] == substr {
+               if s[i+1] == c1 && s[i:i+n] == substr {
                        return i
                }
                i++
                fails++
-               if fails >= 4+i>>4 && i < len(t) {
+               if fails >= 4+i>>4 && i < t {
                        // See comment in ../bytes/bytes_generic.go.
                        j := indexRabinKarp(s[i:], substr)
                        if j < 0 {