]> Cypherpunks repositories - gostls13.git/commitdiff
strings: faster Count, Index
authorDonovan Hide <donovanhide@gmail.com>
Tue, 19 Feb 2013 15:36:15 +0000 (10:36 -0500)
committerRuss Cox <rsc@golang.org>
Tue, 19 Feb 2013 15:36:15 +0000 (10:36 -0500)
Slightly better benchmarks for when string and separator are equivalent and also less branching in inner loops.
benchmark                        old ns/op    new ns/op    delta
BenchmarkGenericNoMatch               3430         3442   +0.35%
BenchmarkGenericMatch1               23590        22855   -3.12%
BenchmarkGenericMatch2              108031       105025   -2.78%
BenchmarkSingleMaxSkipping            2969         2704   -8.93%
BenchmarkSingleLongSuffixFail         2826         2572   -8.99%
BenchmarkSingleMatch                205268       197832   -3.62%
BenchmarkByteByteNoMatch               987          921   -6.69%
BenchmarkByteByteMatch                2014         1749  -13.16%
BenchmarkByteStringMatch              3083         3050   -1.07%
BenchmarkHTMLEscapeNew                 922          915   -0.76%
BenchmarkHTMLEscapeOld                1654         1570   -5.08%
BenchmarkByteByteReplaces            11897        11556   -2.87%
BenchmarkByteByteMap                  4485         4255   -5.13%
BenchmarkIndexRune                     174          121  -30.46%
BenchmarkIndexRuneFastPath              41           41   -0.24%
BenchmarkIndex                          45           44   -0.22%
BenchmarkMapNoChanges                  433          431   -0.46%
BenchmarkIndexHard1                4015336      3316490  -17.40%
BenchmarkIndexHard2                3976254      3395627  -14.60%
BenchmarkIndexHard3                3973158      3378329  -14.97%
BenchmarkCountHard1                4403549      3448512  -21.69%
BenchmarkCountHard2                4387437      3413059  -22.21%
BenchmarkCountHard3                4403891      3382661  -23.19%
BenchmarkIndexTorture                28354        25864   -8.78%
BenchmarkCountTorture                29625        27463   -7.30%
BenchmarkFields                   38752040     39169840   +1.08%
BenchmarkFieldsFunc               38797765     38888060   +0.23%

benchmark                         old MB/s     new MB/s  speedup
BenchmarkSingleMaxSkipping         3367.07      3697.62    1.10x
BenchmarkSingleLongSuffixFail       354.51       389.47    1.10x
BenchmarkSingleMatch                 73.07        75.82    1.04x
BenchmarkFields                      27.06        26.77    0.99x
BenchmarkFieldsFunc                  27.03        26.96    1.00x

R=dave, fullung, remyoudompheng, rsc
CC=golang-dev
https://golang.org/cl/7350045

src/pkg/strings/strings.go
src/pkg/strings/strings_test.go

index 72b8d223af1ad493a078f95dc378f52a1c99ebe4..9203fc51401b1497852ac6eb1a2e054a38549a76 100644 (file)
@@ -59,21 +59,26 @@ func hashstr(sep string) (uint32, uint32) {
 
 // Count counts the number of non-overlapping instances of sep in s.
 func Count(s, sep string) int {
-       if sep == "" {
-               return utf8.RuneCountInString(s) + 1
-       }
-       c := sep[0]
        n := 0
-       if len(sep) == 1 {
+       // special cases
+       switch {
+       case len(sep) == 0:
+               return utf8.RuneCountInString(s) + 1
+       case len(sep) == 1:
                // special case worth making fast
+               c := sep[0]
                for i := 0; i < len(s); i++ {
                        if s[i] == c {
                                n++
                        }
                }
                return n
-       }
-       if len(sep) > len(s) {
+       case len(sep) > len(s):
+               return 0
+       case len(sep) == len(s):
+               if sep == s {
+                       return 1
+               }
                return 0
        }
        hashsep, pow := hashstr(sep)
@@ -82,17 +87,19 @@ func Count(s, sep string) int {
                h = h*primeRK + uint32(s[i])
        }
        lastmatch := 0
-       for i := len(sep); ; i++ {
-               // Invariant: h = hash(s[i-l : i])
+       if h == hashsep && s[:len(sep)] == sep {
+               n++
+               lastmatch = len(sep)
+       }
+       for i := len(sep); i < len(s); {
+               h *= primeRK
+               h += uint32(s[i])
+               h -= pow * uint32(s[i-len(sep)])
+               i++
                if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep {
                        n++
                        lastmatch = i
                }
-               if i >= len(s) {
-                       break
-               }
-               h = h*primeRK + uint32(s[i])
-               h -= pow * uint32(s[i-len(sep)])
        }
        return n
 }
@@ -115,11 +122,11 @@ func ContainsRune(s string, r rune) bool {
 // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
 func Index(s, sep string) int {
        n := len(sep)
-       if n == 0 {
+       switch {
+       case n == 0:
                return 0
-       }
-       c := sep[0]
-       if n == 1 {
+       case n == 1:
+               c := sep[0]
                // special case worth making fast
                for i := 0; i < len(s); i++ {
                        if s[i] == c {
@@ -127,9 +134,12 @@ func Index(s, sep string) int {
                        }
                }
                return -1
-       }
-       // n > 1
-       if n > len(s) {
+       case n == len(s):
+               if sep == s {
+                       return 0
+               }
+               return -1
+       case n > len(s):
                return -1
        }
        // Hash sep.
@@ -138,16 +148,17 @@ func Index(s, sep string) int {
        for i := 0; i < n; i++ {
                h = h*primeRK + uint32(s[i])
        }
-       for i := n; ; i++ {
-               // Invariant: h = hash(s[i-n : i])
+       if h == hashsep && s[:n] == sep {
+               return 0
+       }
+       for i := n; i < len(s); {
+               h *= primeRK
+               h += uint32(s[i])
+               h -= pow * uint32(s[i-n])
+               i++
                if h == hashsep && s[i-n:i] == sep {
                        return i - n
                }
-               if i >= len(s) {
-                       break
-               }
-               h = h*primeRK + uint32(s[i])
-               h -= pow * uint32(s[i-n])
        }
        return -1
 }
index b5bdf35d15b5ea5be2a708192df06f25f22787bc..2db9e3d1f34b68501b9083496d1a429acccb853d 100644 (file)
@@ -1052,6 +1052,14 @@ func BenchmarkCountTorture(b *testing.B) {
        }
 }
 
+func BenchmarkCountTortureOverlapping(b *testing.B) {
+       A := Repeat("ABC", 1<<20)
+       B := Repeat("ABC", 1<<10)
+       for i := 0; i < b.N; i++ {
+               Count(A, B)
+       }
+}
+
 var makeFieldsInput = func() string {
        x := make([]byte, 1<<20)
        // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.