From: Donovan Hide Date: Tue, 19 Feb 2013 15:36:15 +0000 (-0500) Subject: strings: faster Count, Index X-Git-Tag: go1.1rc2~968 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=937f91e1daadfe55aa57e3482485494a0765c849;p=gostls13.git strings: faster Count, Index Slightly better benchmarks for when string and separator are equivalent and also less branching in inner loops. benchmark old ns/op new ns/op delta BenchmarkGenericNoMatch 3430 3442 +0.35% BenchmarkGenericMatch1 23590 22855 -3.12% BenchmarkGenericMatch2 108031 105025 -2.78% BenchmarkSingleMaxSkipping 2969 2704 -8.93% BenchmarkSingleLongSuffixFail 2826 2572 -8.99% BenchmarkSingleMatch 205268 197832 -3.62% BenchmarkByteByteNoMatch 987 921 -6.69% BenchmarkByteByteMatch 2014 1749 -13.16% BenchmarkByteStringMatch 3083 3050 -1.07% BenchmarkHTMLEscapeNew 922 915 -0.76% BenchmarkHTMLEscapeOld 1654 1570 -5.08% BenchmarkByteByteReplaces 11897 11556 -2.87% BenchmarkByteByteMap 4485 4255 -5.13% BenchmarkIndexRune 174 121 -30.46% BenchmarkIndexRuneFastPath 41 41 -0.24% BenchmarkIndex 45 44 -0.22% BenchmarkMapNoChanges 433 431 -0.46% BenchmarkIndexHard1 4015336 3316490 -17.40% BenchmarkIndexHard2 3976254 3395627 -14.60% BenchmarkIndexHard3 3973158 3378329 -14.97% BenchmarkCountHard1 4403549 3448512 -21.69% BenchmarkCountHard2 4387437 3413059 -22.21% BenchmarkCountHard3 4403891 3382661 -23.19% BenchmarkIndexTorture 28354 25864 -8.78% BenchmarkCountTorture 29625 27463 -7.30% BenchmarkFields 38752040 39169840 +1.08% BenchmarkFieldsFunc 38797765 38888060 +0.23% benchmark old MB/s new MB/s speedup BenchmarkSingleMaxSkipping 3367.07 3697.62 1.10x BenchmarkSingleLongSuffixFail 354.51 389.47 1.10x BenchmarkSingleMatch 73.07 75.82 1.04x BenchmarkFields 27.06 26.77 0.99x BenchmarkFieldsFunc 27.03 26.96 1.00x R=dave, fullung, remyoudompheng, rsc CC=golang-dev https://golang.org/cl/7350045 --- diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go index 72b8d223af..9203fc5140 100644 --- a/src/pkg/strings/strings.go +++ b/src/pkg/strings/strings.go @@ -59,21 +59,26 @@ func hashstr(sep string) (uint32, uint32) { // Count counts the number of non-overlapping instances of sep in s. func Count(s, sep string) int { - if sep == "" { - return utf8.RuneCountInString(s) + 1 - } - c := sep[0] n := 0 - if len(sep) == 1 { + // special cases + switch { + case len(sep) == 0: + return utf8.RuneCountInString(s) + 1 + case len(sep) == 1: // special case worth making fast + c := sep[0] for i := 0; i < len(s); i++ { if s[i] == c { n++ } } return n - } - if len(sep) > len(s) { + case len(sep) > len(s): + return 0 + case len(sep) == len(s): + if sep == s { + return 1 + } return 0 } hashsep, pow := hashstr(sep) @@ -82,17 +87,19 @@ func Count(s, sep string) int { h = h*primeRK + uint32(s[i]) } lastmatch := 0 - for i := len(sep); ; i++ { - // Invariant: h = hash(s[i-l : i]) + if h == hashsep && s[:len(sep)] == sep { + n++ + lastmatch = len(sep) + } + for i := len(sep); i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-len(sep)]) + i++ if h == hashsep && lastmatch <= i-len(sep) && s[i-len(sep):i] == sep { n++ lastmatch = i } - if i >= len(s) { - break - } - h = h*primeRK + uint32(s[i]) - h -= pow * uint32(s[i-len(sep)]) } return n } @@ -115,11 +122,11 @@ func ContainsRune(s string, r rune) bool { // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. func Index(s, sep string) int { n := len(sep) - if n == 0 { + switch { + case n == 0: return 0 - } - c := sep[0] - if n == 1 { + case n == 1: + c := sep[0] // special case worth making fast for i := 0; i < len(s); i++ { if s[i] == c { @@ -127,9 +134,12 @@ func Index(s, sep string) int { } } return -1 - } - // n > 1 - if n > len(s) { + case n == len(s): + if sep == s { + return 0 + } + return -1 + case n > len(s): return -1 } // Hash sep. @@ -138,16 +148,17 @@ func Index(s, sep string) int { for i := 0; i < n; i++ { h = h*primeRK + uint32(s[i]) } - for i := n; ; i++ { - // Invariant: h = hash(s[i-n : i]) + if h == hashsep && s[:n] == sep { + return 0 + } + for i := n; i < len(s); { + h *= primeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ if h == hashsep && s[i-n:i] == sep { return i - n } - if i >= len(s) { - break - } - h = h*primeRK + uint32(s[i]) - h -= pow * uint32(s[i-n]) } return -1 } diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go index b5bdf35d15..2db9e3d1f3 100644 --- a/src/pkg/strings/strings_test.go +++ b/src/pkg/strings/strings_test.go @@ -1052,6 +1052,14 @@ func BenchmarkCountTorture(b *testing.B) { } } +func BenchmarkCountTortureOverlapping(b *testing.B) { + A := Repeat("ABC", 1<<20) + B := Repeat("ABC", 1<<10) + for i := 0; i < b.N; i++ { + Count(A, B) + } +} + var makeFieldsInput = func() string { x := make([]byte, 1<<20) // Input is ~10% space, ~10% 2-byte UTF-8, rest ASCII non-space.