]> Cypherpunks repositories - gostls13.git/commitdiff
bytes: Use the same algorithm as strings for Index
authorIlya Tocar <ilya.tocar@intel.com>
Thu, 28 Apr 2016 14:34:24 +0000 (17:34 +0300)
committerIlya Tocar <ilya.tocar@intel.com>
Thu, 1 Sep 2016 18:05:50 +0000 (18:05 +0000)
name                     old time/op    new time/op      delta
IndexByte32-48             9.05ns ± 7%      9.59ns ±11%     +5.93%  (p=0.001 n=19+20)
IndexByte4K-48              118ns ± 4%       122ns ± 8%     +3.52%  (p=0.002 n=19+19)
IndexByte4M-48              172µs ±13%       188µs ±12%     +9.49%  (p=0.000 n=20+20)
IndexByte64M-48            8.00ms ±14%      8.05ms ±23%       ~     (p=0.799 n=20+20)
IndexBytePortable32-48     41.7ns ±15%      42.5ns ±12%       ~     (p=0.372 n=20+20)
IndexBytePortable4K-48     3.08µs ±16%      3.26µs ±10%     +5.77%  (p=0.018 n=20+20)
IndexBytePortable4M-48     3.12ms ±17%      3.20ms ±10%       ~     (p=0.157 n=20+20)
IndexBytePortable64M-48    54.0ms ±14%      55.3ms ±14%       ~     (p=0.640 n=20+20)
Index32-48                  230ns ±12%        46ns ± 6%    -79.87%  (p=0.000 n=20+19)
Index4K-48                 43.2µs ± 9%       3.2µs ±12%    -92.58%  (p=0.000 n=19+20)
Index4M-48                 44.4ms ± 7%       3.3ms ±13%    -92.59%  (p=0.000 n=19+20)
Index64M-48                 714ms ±10%        56ms ± 8%    -92.22%  (p=0.000 n=19+19)
IndexEasy32-48             52.7ns ±10%      31.0ns ±11%    -41.21%  (p=0.000 n=20+20)
IndexEasy4K-48              139ns ± 5%      1598ns ± 6%  +1046.37%  (p=0.000 n=19+19)
IndexEasy4M-48              179µs ± 8%      1674µs ±10%   +834.31%  (p=0.000 n=19+20)
IndexEasy64M-48            8.56ms ±10%     27.82ms ±16%   +225.14%  (p=0.000 n=19+20)

name                     old speed      new speed        delta
IndexByte32-48           3.52GB/s ± 7%    3.35GB/s ±11%     -4.99%  (p=0.001 n=20+20)
IndexByte4K-48           34.5GB/s ± 7%    33.2GB/s ±10%     -3.67%  (p=0.002 n=20+20)
IndexByte4M-48           24.6GB/s ±14%    22.4GB/s ±14%     -8.73%  (p=0.000 n=20+20)
IndexByte64M-48          8.42GB/s ±16%    8.42GB/s ±19%       ~     (p=0.799 n=20+20)
IndexBytePortable32-48    770MB/s ±13%     756MB/s ±11%       ~     (p=0.383 n=20+20)
IndexBytePortable4K-48   1.34GB/s ±14%    1.26GB/s ±10%     -5.76%  (p=0.018 n=20+20)
IndexBytePortable4M-48   1.35GB/s ±15%    1.31GB/s ±11%       ~     (p=0.157 n=20+20)
IndexBytePortable64M-48  1.25GB/s ±16%    1.22GB/s ±13%       ~     (p=0.640 n=20+20)
Index32-48                138MB/s ± 8%     687MB/s ± 8%   +398.57%  (p=0.000 n=19+20)
Index4K-48               94.9MB/s ± 9%  1280.5MB/s ±11%  +1249.11%  (p=0.000 n=19+20)
Index4M-48               94.6MB/s ± 7%  1278.5MB/s ±12%  +1250.99%  (p=0.000 n=19+20)
Index64M-48              94.2MB/s ±10%  1210.9MB/s ± 8%  +1185.04%  (p=0.000 n=19+19)
IndexEasy32-48            608MB/s ±10%    1035MB/s ±10%    +70.15%  (p=0.000 n=20+20)
IndexEasy4K-48           29.3GB/s ± 6%     2.6GB/s ± 6%    -91.24%  (p=0.000 n=19+19)
IndexEasy4M-48           23.3GB/s ±10%     2.5GB/s ± 9%    -89.23%  (p=0.000 n=20+20)
IndexEasy64M-48          7.86GB/s ±11%    2.42GB/s ±14%    -69.18%  (p=0.000 n=19+20)

Change-Id: Ia191f0a6ca80e113397d9ed98d25f195768b65bc
Reviewed-on: https://go-review.googlesource.com/22550
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/bytes/bytes.go
src/bytes/bytes_amd64.go [new file with mode: 0644]
src/bytes/bytes_generic.go [new file with mode: 0644]
src/runtime/asm_amd64.s

index 305c85d9f412fcb449272653c6931eb86af81e16..c35a1c0005f2ce69aa924ce02447ed5133cc37f8 100644 (file)
@@ -93,37 +93,6 @@ func ContainsRune(b []byte, r rune) bool {
        return IndexRune(b, r) >= 0
 }
 
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep []byte) int {
-       n := len(sep)
-       if n == 0 {
-               return 0
-       }
-       if n > len(s) {
-               return -1
-       }
-       c := sep[0]
-       if n == 1 {
-               return IndexByte(s, c)
-       }
-       i := 0
-       t := s[:len(s)-n+1]
-       for i < len(t) {
-               if t[i] != c {
-                       o := IndexByte(t[i:], c)
-                       if o < 0 {
-                               break
-                       }
-                       i += o
-               }
-               if Equal(s[i:i+n], sep) {
-                       return i
-               }
-               i++
-       }
-       return -1
-}
-
 func indexBytePortable(s []byte, c byte) int {
        for i, b := range s {
                if b == c {
diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go
new file mode 100644 (file)
index 0000000..e8be28b
--- /dev/null
@@ -0,0 +1,69 @@
+// Copyright 2016 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytes
+
+// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
+// indexShortStr requires 2 <= len(c) <= shortStringLen
+func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
+const shortStringLen = 31
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+       n := len(sep)
+       switch {
+       case n == 0:
+               return 0
+       case n == 1:
+               return IndexByte(s, sep[0])
+       case n <= shortStringLen:
+               return indexShortStr(s, sep)
+       case n == len(s):
+               if Equal(sep, s) {
+                       return 0
+               }
+               return -1
+       case n > len(s):
+               return -1
+       }
+       // Rabin-Karp search
+       hashsep, pow := hashStr(sep)
+       var h uint32
+       for i := 0; i < n; i++ {
+               h = h*primeRK + uint32(s[i])
+       }
+       if h == hashsep && Equal(s[:n], sep) {
+               return 0
+       }
+       for i := n; i < len(s); {
+               h *= primeRK
+               h += uint32(s[i])
+               h -= pow * uint32(s[i-n])
+               i++
+               if h == hashsep && Equal(s[i-n:i], sep) {
+                       return i - n
+               }
+       }
+       return -1
+}
+
+// primeRK is the prime base used in Rabin-Karp algorithm.
+const primeRK = 16777619
+
+// hashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func hashStr(sep []byte) (uint32, uint32) {
+       hash := uint32(0)
+       for i := 0; i < len(sep); i++ {
+               hash = hash*primeRK + uint32(sep[i])
+       }
+       var pow, sq uint32 = 1, primeRK
+       for i := len(sep); i > 0; i >>= 1 {
+               if i&1 != 0 {
+                       pow *= sq
+               }
+               sq *= sq
+       }
+       return hash, pow
+}
diff --git a/src/bytes/bytes_generic.go b/src/bytes/bytes_generic.go
new file mode 100644 (file)
index 0000000..88e232e
--- /dev/null
@@ -0,0 +1,41 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64
+
+package bytes
+
+// TODO: implements short string optimization on non amd64 platforms
+// and get rid of bytes_amd64.go
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep []byte) int {
+       n := len(sep)
+       if n == 0 {
+               return 0
+       }
+       if n > len(s) {
+               return -1
+       }
+       c := sep[0]
+       if n == 1 {
+               return IndexByte(s, c)
+       }
+       i := 0
+       t := s[:len(s)-n+1]
+       for i < len(t) {
+               if t[i] != c {
+                       o := IndexByte(t[i:], c)
+                       if o < 0 {
+                               break
+                       }
+                       i += o
+               }
+               if Equal(s[i:i+n], sep) {
+                       return i
+               }
+               i++
+       }
+       return -1
+}
index f44fc1166a42ba89353e4d2e07f31bea202870ff..c9d6b90d800ec78aff972174613528f2b5296174 100644 (file)
@@ -1695,13 +1695,31 @@ big_loop_avx2_exit:
        JMP loop
 
 
-// TODO: Also use this in bytes.Index
 TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
        MOVQ s+0(FP), DI
        // We want len in DX and AX, because PCMPESTRI implicitly consumes them
        MOVQ s_len+8(FP), DX
        MOVQ c+16(FP), BP
        MOVQ c_len+24(FP), AX
+       MOVQ DI, R10
+       LEAQ ret+32(FP), R11
+       JMP  runtime·indexShortStr(SB)
+
+TEXT bytes·indexShortStr(SB),NOSPLIT,$0-56
+       MOVQ s+0(FP), DI
+       MOVQ s_len+8(FP), DX
+       MOVQ c+24(FP), BP
+       MOVQ c_len+32(FP), AX
+       MOVQ DI, R10
+       LEAQ ret+48(FP), R11
+       JMP  runtime·indexShortStr(SB)
+
+// AX: length of string, that we are searching for
+// DX: length of string, in which we are searching
+// DI: pointer to string, in which we are searching
+// BP: pointer to string, that we are searching for
+// R11: address, where to put return value
+TEXT runtime·indexShortStr(SB),NOSPLIT,$0
        CMPQ AX, DX
        JA fail
        CMPQ DX, $16
@@ -1853,7 +1871,7 @@ partial_success17to31:
        CMPQ DI,DX
        JB loop17to31
 fail:
-       MOVQ $-1, ret+32(FP)
+       MOVQ $-1, (R11)
        RET
 sse42:
        MOVL runtime·cpuid_ecx(SB), CX
@@ -1893,8 +1911,8 @@ loop_sse42:
 sse42_success:
        ADDQ CX, DI
 success:
-       SUBQ s+0(FP), DI
-       MOVQ DI, ret+32(FP)
+       SUBQ R10, DI
+       MOVQ DI, (R11)
        RET