bytes,strings: use IndexByte more often in Index on AMD64

author Ilya Tocar <ilya.tocar@intel.com>

Fri, 21 Oct 2016 20:23:48 +0000 (23:23 +0300)

committer Ilya Tocar <ilya.tocar@intel.com>

Tue, 1 Nov 2016 18:30:52 +0000 (18:30 +0000)
author Ilya Tocar <ilya.tocar@intel.com>
Fri, 21 Oct 2016 20:23:48 +0000 (23:23 +0300)
committer Ilya Tocar <ilya.tocar@intel.com>
Tue, 1 Nov 2016 18:30:52 +0000 (18:30 +0000)
diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go

index 198962322ad54bd4e701219b4c6a27585c85dc93..9a4e5e375a954c421d30cf88cea725595e72ddb0 100644 (file)
--- a/src/bytes/bytes_amd64.go
+++ b/src/bytes/bytes_amd64.go
@@ -29,8 +29,6 @@ func Index(s, sep []byte) int {
                 return 0
         case n == 1:
                 return IndexByte(s, sep[0])
-       case n <= shortStringLen:
-               return indexShortStr(s, sep)
         case n == len(s):
                 if Equal(sep, s) {
                         return 0
@@ -38,6 +36,42 @@ func Index(s, sep []byte) int {
                 return -1
         case n > len(s):
                 return -1
+       case n <= shortStringLen:
+               // Use brute force when s and sep both are small
+               if len(s) <= 64 {
+                       return indexShortStr(s, sep)
+               }
+               c := sep[0]
+               i := 0
+               t := s[:len(s)-n+1]
+               fails := 0
+               for i < len(t) {
+                       if t[i] != c {
+                               // IndexByte skips 16/32 bytes per iteration,
+                               // so it's faster than indexShortStr.
+                               o := IndexByte(t[i:], c)
+                               if o < 0 {
+                                       return -1
+                               }
+                               i += o
+                       }
+                       if Equal(s[i:i+n], sep) {
+                               return i
+                       }
+                       fails++
+                       i++
+                       // Switch to indexShortStr when IndexByte produces too many false positives.
+                       // Too many means more that 1 error per 8 characters.
+                       // Allow some errors in the beginning.
+                       if fails > (i+16)/8 {
+                               r := indexShortStr(s[i:], sep)
+                               if r >= 0 {
+                                       return r + i
+                               }
+                               return -1
+                       }
+               }
+               return -1
         }
         // Rabin-Karp search
         hashsep, pow := hashStr(sep)
diff --git a/src/strings/strings_amd64.go b/src/strings/strings_amd64.go

index 5e26ee2c97f9e4d12b522f804fe7b5af4371f8a2..23a98d59458e7e69e116b70eb4a32942e46df5a9 100644 (file)
--- a/src/strings/strings_amd64.go
+++ b/src/strings/strings_amd64.go
@@ -29,8 +29,6 @@ func Index(s, sep string) int {
                 return 0
         case n == 1:
                 return IndexByte(s, sep[0])
-       case n <= shortStringLen:
-               return indexShortStr(s, sep)
         case n == len(s):
                 if sep == s {
                         return 0
@@ -38,6 +36,42 @@ func Index(s, sep string) int {
                 return -1
         case n > len(s):
                 return -1
+       case n <= shortStringLen:
+               // Use brute force when s and sep both are small
+               if len(s) <= 64 {
+                       return indexShortStr(s, sep)
+               }
+               c := sep[0]
+               i := 0
+               t := s[:len(s)-n+1]
+               fails := 0
+               for i < len(t) {
+                       if t[i] != c {
+                               // IndexByte skips 16/32 bytes per iteration,
+                               // so it's faster than indexShortStr.
+                               o := IndexByte(t[i:], c)
+                               if o < 0 {
+                                       return -1
+                               }
+                               i += o
+                       }
+                       if s[i:i+n] == sep {
+                               return i
+                       }
+                       fails++
+                       i++
+                       // Switch to indexShortStr when IndexByte produces too many false positives.
+                       // Too many means more that 1 error per 8 characters.
+                       // Allow some errors in the beginning.
+                       if fails > (i+16)/8 {
+                               r := indexShortStr(s[i:], sep)
+                               if r >= 0 {
+                                       return r + i
+                               }
+                               return -1
+                       }
+               }
+               return -1
         }
         // Rabin-Karp search
         hashsep, pow := hashStr(sep)
author	Ilya Tocar <ilya.tocar@intel.com>
	Fri, 21 Oct 2016 20:23:48 +0000 (23:23 +0300)
committer	Ilya Tocar <ilya.tocar@intel.com>
	Tue, 1 Nov 2016 18:30:52 +0000 (18:30 +0000)
src/bytes/bytes_amd64.go		patch \| blob \| history
src/strings/strings_amd64.go		patch \| blob \| history