]> Cypherpunks repositories - gostls13.git/commitdiff
bytes, strings: optimize for ASCII sets
authorJoe Tsai <joetsai@digital-static.net>
Thu, 20 Oct 2016 10:16:22 +0000 (03:16 -0700)
committerJoe Tsai <thebrokentoaster@gmail.com>
Fri, 28 Oct 2016 17:37:04 +0000 (17:37 +0000)
In a large codebase within Google, there are thousands of uses of:
ContainsAny|IndexAny|LastIndexAny|Trim|TrimLeft|TrimRight

An analysis of their usage shows that over 97% of them only use character
sets consisting of only ASCII symbols.

Uses of ContainsAny|IndexAny|LastIndexAny:
 6% are 1   character  (e.g., "\n" or " ")
58% are 2-4 characters (e.g., "<>" or "\r\n\t ")
24% are 5-9 characters (e.g., "()[]*^$")
10% are 10+ characters (e.g., "+-=&|><!(){}[]^\"~*?:\\/ ")
We optimize for ASCII sets, which are commonly used to search for
"control" characters in some string. We don't optimize for the
single character scenario since IndexRune or IndexByte could be used.

Uses of Trim|TrimLeft|TrimRight:
71% are 1   character  (e.g., "\n" or " ")
14% are 2   characters (e.g., "\r\n")
10% are 3-4 characters (e.g., " \t\r\n")
 5% are 10+ characters (e.g., "0123456789abcdefABCDEF")
We optimize for the single character case with a simple closured function
that only checks for that character's value. We optimize for the medium
and larger sets using a 16-byte bit-map representing a set of ASCII characters.

The benchmarks below have the following suffix name "%d:%d" where the first
number is the length of the input and the second number is the length
of the charset.

== bytes package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.09          5.23          +2.75%
BenchmarkIndexAnyASCII/1:2-4         5.81          5.85          +0.69%
BenchmarkIndexAnyASCII/1:4-4         7.22          7.50          +3.88%
BenchmarkIndexAnyASCII/1:8-4         11.0          11.1          +0.91%
BenchmarkIndexAnyASCII/1:16-4        17.5          17.8          +1.71%
BenchmarkIndexAnyASCII/16:1-4        36.0          34.0          -5.56%
BenchmarkIndexAnyASCII/16:2-4        46.6          36.5          -21.67%
BenchmarkIndexAnyASCII/16:4-4        78.0          40.4          -48.21%
BenchmarkIndexAnyASCII/16:8-4        136           47.4          -65.15%
BenchmarkIndexAnyASCII/16:16-4       254           61.5          -75.79%
BenchmarkIndexAnyASCII/256:1-4       542           388           -28.41%
BenchmarkIndexAnyASCII/256:2-4       705           382           -45.82%
BenchmarkIndexAnyASCII/256:4-4       1089          386           -64.55%
BenchmarkIndexAnyASCII/256:8-4       1994          394           -80.24%
BenchmarkIndexAnyASCII/256:16-4      3843          411           -89.31%
BenchmarkIndexAnyASCII/4096:1-4      8522          5873          -31.08%
BenchmarkIndexAnyASCII/4096:2-4      11253         5861          -47.92%
BenchmarkIndexAnyASCII/4096:4-4      17824         5883          -66.99%
BenchmarkIndexAnyASCII/4096:8-4      32053         5871          -81.68%
BenchmarkIndexAnyASCII/4096:16-4     60512         5888          -90.27%
BenchmarkTrimASCII/1:1-4             79.5          70.8          -10.94%
BenchmarkTrimASCII/1:2-4             79.0          105           +32.91%
BenchmarkTrimASCII/1:4-4             79.6          109           +36.93%
BenchmarkTrimASCII/1:8-4             78.8          118           +49.75%
BenchmarkTrimASCII/1:16-4            80.2          132           +64.59%
BenchmarkTrimASCII/16:1-4            243           116           -52.26%
BenchmarkTrimASCII/16:2-4            243           171           -29.63%
BenchmarkTrimASCII/16:4-4            243           176           -27.57%
BenchmarkTrimASCII/16:8-4            241           184           -23.65%
BenchmarkTrimASCII/16:16-4           238           199           -16.39%
BenchmarkTrimASCII/256:1-4           2580          840           -67.44%
BenchmarkTrimASCII/256:2-4           2603          1175          -54.86%
BenchmarkTrimASCII/256:4-4           2572          1188          -53.81%
BenchmarkTrimASCII/256:8-4           2550          1191          -53.29%
BenchmarkTrimASCII/256:16-4          2585          1208          -53.27%
BenchmarkTrimASCII/4096:1-4          39773         12181         -69.37%
BenchmarkTrimASCII/4096:2-4          39946         17231         -56.86%
BenchmarkTrimASCII/4096:4-4          39641         17179         -56.66%
BenchmarkTrimASCII/4096:8-4          39835         17175         -56.88%
BenchmarkTrimASCII/4096:16-4         40229         17215         -57.21%

== strings package ==
benchmark                            old ns/op     new ns/op     delta
BenchmarkIndexAnyASCII/1:1-4         5.94          4.97          -16.33%
BenchmarkIndexAnyASCII/1:2-4         5.94          5.55          -6.57%
BenchmarkIndexAnyASCII/1:4-4         7.45          7.21          -3.22%
BenchmarkIndexAnyASCII/1:8-4         10.8          10.6          -1.85%
BenchmarkIndexAnyASCII/1:16-4        17.4          17.2          -1.15%
BenchmarkIndexAnyASCII/16:1-4        36.4          32.2          -11.54%
BenchmarkIndexAnyASCII/16:2-4        49.6          34.6          -30.24%
BenchmarkIndexAnyASCII/16:4-4        77.5          37.9          -51.10%
BenchmarkIndexAnyASCII/16:8-4        138           45.5          -67.03%
BenchmarkIndexAnyASCII/16:16-4       241           59.1          -75.48%
BenchmarkIndexAnyASCII/256:1-4       509           378           -25.74%
BenchmarkIndexAnyASCII/256:2-4       720           381           -47.08%
BenchmarkIndexAnyASCII/256:4-4       1142          384           -66.37%
BenchmarkIndexAnyASCII/256:8-4       1999          391           -80.44%
BenchmarkIndexAnyASCII/256:16-4      3735          403           -89.21%
BenchmarkIndexAnyASCII/4096:1-4      7973          5824          -26.95%
BenchmarkIndexAnyASCII/4096:2-4      11432         5809          -49.19%
BenchmarkIndexAnyASCII/4096:4-4      18327         5819          -68.25%
BenchmarkIndexAnyASCII/4096:8-4      33059         5828          -82.37%
BenchmarkIndexAnyASCII/4096:16-4     59703         5817          -90.26%
BenchmarkTrimASCII/1:1-4             71.9          71.8          -0.14%
BenchmarkTrimASCII/1:2-4             73.3          103           +40.52%
BenchmarkTrimASCII/1:4-4             71.8          106           +47.63%
BenchmarkTrimASCII/1:8-4             71.2          113           +58.71%
BenchmarkTrimASCII/1:16-4            71.6          128           +78.77%
BenchmarkTrimASCII/16:1-4            152           116           -23.68%
BenchmarkTrimASCII/16:2-4            160           168           +5.00%
BenchmarkTrimASCII/16:4-4            172           170           -1.16%
BenchmarkTrimASCII/16:8-4            200           177           -11.50%
BenchmarkTrimASCII/16:16-4           254           193           -24.02%
BenchmarkTrimASCII/256:1-4           1438          864           -39.92%
BenchmarkTrimASCII/256:2-4           1551          1195          -22.95%
BenchmarkTrimASCII/256:4-4           1770          1200          -32.20%
BenchmarkTrimASCII/256:8-4           2195          1216          -44.60%
BenchmarkTrimASCII/256:16-4          3054          1224          -59.92%
BenchmarkTrimASCII/4096:1-4          21726         12557         -42.20%
BenchmarkTrimASCII/4096:2-4          23586         17508         -25.77%
BenchmarkTrimASCII/4096:4-4          26898         17510         -34.90%
BenchmarkTrimASCII/4096:8-4          33714         17595         -47.81%
BenchmarkTrimASCII/4096:16-4         47429         17700         -62.68%

The benchmarks added test the worst case. For IndexAny, that is when the
charset matches none of the input. For Trim, it is when the charset matches
all of the input.

Change-Id: I970874d101a96b33528fc99b165379abe58cf6ea
Reviewed-on: https://go-review.googlesource.com/31593
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Reviewed-by: Martin Möhrmann <martisch@uos.de>
src/bytes/bytes.go
src/bytes/bytes_test.go
src/strings/strings.go
src/strings/strings_test.go

index 40c7c23cd78b4f0288ee896420255217a0c683fd..406a38257a74c420266dc013782fd9c91256686f 100644 (file)
@@ -160,10 +160,19 @@ func IndexRune(s []byte, r rune) int {
 // point in common.
 func IndexAny(s []byte, chars string) int {
        if len(chars) > 0 {
-               var r rune
+               if len(s) > 8 {
+                       if as, isASCII := makeASCIISet(chars); isASCII {
+                               for i, c := range s {
+                                       if as.contains(c) {
+                                               return i
+                                       }
+                               }
+                               return -1
+                       }
+               }
                var width int
                for i := 0; i < len(s); i += width {
-                       r = rune(s[i])
+                       r := rune(s[i])
                        if r < utf8.RuneSelf {
                                width = 1
                        } else {
@@ -185,11 +194,21 @@ func IndexAny(s []byte, chars string) int {
 // there is no code point in common.
 func LastIndexAny(s []byte, chars string) int {
        if len(chars) > 0 {
+               if len(s) > 8 {
+                       if as, isASCII := makeASCIISet(chars); isASCII {
+                               for i := len(s) - 1; i >= 0; i-- {
+                                       if as.contains(s[i]) {
+                                               return i
+                                       }
+                               }
+                               return -1
+                       }
+               }
                for i := len(s); i > 0; {
-                       r, size := utf8.DecodeLastRune(s[0:i])
+                       r, size := utf8.DecodeLastRune(s[:i])
                        i -= size
-                       for _, ch := range chars {
-                               if r == ch {
+                       for _, c := range chars {
+                               if r == c {
                                        return i
                                }
                        }
@@ -573,7 +592,43 @@ func lastIndexFunc(s []byte, f func(r rune) bool, truth bool) int {
        return -1
 }
 
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+       for i := 0; i < len(chars); i++ {
+               c := chars[i]
+               if c >= utf8.RuneSelf {
+                       return as, false
+               }
+               as[c>>5] |= 1 << uint(c&31)
+       }
+       return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+       return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(r rune) bool {
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return func(r rune) bool {
+                       return r == rune(cutset[0])
+               }
+       }
+       if as, isASCII := makeASCIISet(cutset); isASCII {
+               return func(r rune) bool {
+                       return r < utf8.RuneSelf && as.contains(byte(r))
+               }
+       }
        return func(r rune) bool {
                for _, c := range cutset {
                        if c == r {
index 146dc42b0de5b9112a45917847ffbb9c99fd1081..26eac5e08c6df35cb6be2da6a84de310b2ff1ffc 100644 (file)
@@ -167,8 +167,12 @@ var indexAnyTests = []BinOpTest{
        {"abc", "xyz", -1},
        {"abc", "xcz", 2},
        {"ab☺c", "x☺yz", 2},
+       {"a☺b☻c☹d", "cx", len("a☺b☻")},
+       {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
        {"aRegExp*", ".(|)*+?^$[]", 7},
        {dots + dots + dots, " ", -1},
+       {"012abcba210", "\xffb", 4},
+       {"012\x80bcb\x80210", "\xffb", 3},
 }
 
 var lastIndexAnyTests = []BinOpTest{
@@ -180,9 +184,13 @@ var lastIndexAnyTests = []BinOpTest{
        {"aaa", "a", 2},
        {"abc", "xyz", -1},
        {"abc", "ab", 1},
-       {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+       {"ab☺c", "x☺yz", 2},
+       {"a☺b☻c☹d", "cx", len("a☺b☻")},
+       {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
        {"a.RegExp*", ".(|)*+?^$[]", 8},
        {dots + dots + dots, " ", -1},
+       {"012abcba210", "\xffb", 6},
+       {"012\x80bcb\x80210", "\xffb", 7},
 }
 
 // Execute f on each test case.  funcName should be the name of f; it's used
@@ -1029,6 +1037,9 @@ var trimTests = []TrimTest{
        {"Trim", "* listitem", " *", "listitem"},
        {"Trim", `"quote"`, `"`, "quote"},
        {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+       {"Trim", "\x80test\xff", "\xff", "test"},
+       {"Trim", " Ġ ", " ", "Ġ"},
+       {"Trim", " Ġİ0", "0 ", "Ġİ"},
        //empty string tests
        {"Trim", "abba", "", "abba"},
        {"Trim", "", "123", ""},
@@ -1448,3 +1459,31 @@ func BenchmarkBytesCompare(b *testing.B) {
                })
        }
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+       x := Repeat([]byte{'#'}, 4096) // Never matches set
+       cs := "0123456789abcdef"
+       for k := 1; k <= 4096; k <<= 4 {
+               for j := 1; j <= 16; j <<= 1 {
+                       b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+                               for i := 0; i < b.N; i++ {
+                                       IndexAny(x[:k], cs[:j])
+                               }
+                       })
+               }
+       }
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+       cs := "0123456789abcdef"
+       for k := 1; k <= 4096; k <<= 4 {
+               for j := 1; j <= 16; j <<= 1 {
+                       b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+                               x := Repeat([]byte(cs[:j]), k) // Always matches set
+                               for i := 0; i < b.N; i++ {
+                                       Trim(x[:k], cs[:j])
+                               }
+                       })
+               }
+       }
+}
index 349989278d93a327f9c4c02b48b14c60771b55b4..60a281a6ac50dda9204bed74d5d510d0596a183b 100644 (file)
@@ -169,6 +169,16 @@ func IndexRune(s string, r rune) int {
 // from chars in s, or -1 if no Unicode code point from chars is present in s.
 func IndexAny(s, chars string) int {
        if len(chars) > 0 {
+               if len(s) > 8 {
+                       if as, isASCII := makeASCIISet(chars); isASCII {
+                               for i := 0; i < len(s); i++ {
+                                       if as.contains(s[i]) {
+                                               return i
+                                       }
+                               }
+                               return -1
+                       }
+               }
                for i, c := range s {
                        for _, m := range chars {
                                if c == m {
@@ -185,11 +195,21 @@ func IndexAny(s, chars string) int {
 // present in s.
 func LastIndexAny(s, chars string) int {
        if len(chars) > 0 {
+               if len(s) > 8 {
+                       if as, isASCII := makeASCIISet(chars); isASCII {
+                               for i := len(s) - 1; i >= 0; i-- {
+                                       if as.contains(s[i]) {
+                                               return i
+                                       }
+                               }
+                               return -1
+                       }
+               }
                for i := len(s); i > 0; {
-                       rune, size := utf8.DecodeLastRuneInString(s[0:i])
+                       r, size := utf8.DecodeLastRuneInString(s[:i])
                        i -= size
-                       for _, m := range chars {
-                               if rune == m {
+                       for _, c := range chars {
+                               if r == c {
                                        return i
                                }
                        }
@@ -570,7 +590,43 @@ func lastIndexFunc(s string, f func(rune) bool, truth bool) int {
        return -1
 }
 
+// asciiSet is a 32-byte value, where each bit represents the presence of a
+// given ASCII character in the set. The 128-bits of the lower 16 bytes,
+// starting with the least-significant bit of the lowest word to the
+// most-significant bit of the highest word, map to the full range of all
+// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
+// ensuring that any non-ASCII character will be reported as not in the set.
+type asciiSet [8]uint32
+
+// makeASCIISet creates a set of ASCII characters and reports whether all
+// characters in chars are ASCII.
+func makeASCIISet(chars string) (as asciiSet, ok bool) {
+       for i := 0; i < len(chars); i++ {
+               c := chars[i]
+               if c >= utf8.RuneSelf {
+                       return as, false
+               }
+               as[c>>5] |= 1 << uint(c&31)
+       }
+       return as, true
+}
+
+// contains reports whether c is inside the set.
+func (as *asciiSet) contains(c byte) bool {
+       return (as[c>>5] & (1 << uint(c&31))) != 0
+}
+
 func makeCutsetFunc(cutset string) func(rune) bool {
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return func(r rune) bool {
+                       return r == rune(cutset[0])
+               }
+       }
+       if as, isASCII := makeASCIISet(cutset); isASCII {
+               return func(r rune) bool {
+                       return r < utf8.RuneSelf && as.contains(byte(r))
+               }
+       }
        return func(r rune) bool { return IndexRune(cutset, r) >= 0 }
 }
 
index 6815944899d1824800541f43d28e7db351a573f7..68b5943c59c60ba38321422f989c56567b530ae1 100644 (file)
@@ -152,10 +152,15 @@ var indexAnyTests = []IndexTest{
        {"aaa", "a", 0},
        {"abc", "xyz", -1},
        {"abc", "xcz", 2},
-       {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+       {"ab☺c", "x☺yz", 2},
+       {"a☺b☻c☹d", "cx", len("a☺b☻")},
+       {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
        {"aRegExp*", ".(|)*+?^$[]", 7},
        {dots + dots + dots, " ", -1},
+       {"012abcba210", "\xffb", 4},
+       {"012\x80bcb\x80210", "\xffb", 3},
 }
+
 var lastIndexAnyTests = []IndexTest{
        {"", "", -1},
        {"", "a", -1},
@@ -165,9 +170,13 @@ var lastIndexAnyTests = []IndexTest{
        {"aaa", "a", 2},
        {"abc", "xyz", -1},
        {"abc", "ab", 1},
-       {"a☺b☻c☹d", "uvw☻xyz", 2 + len("☺")},
+       {"ab☺c", "x☺yz", 2},
+       {"a☺b☻c☹d", "cx", len("a☺b☻")},
+       {"a☺b☻c☹d", "uvw☻xyz", len("a☺b")},
        {"a.RegExp*", ".(|)*+?^$[]", 8},
        {dots + dots + dots, " ", -1},
+       {"012abcba210", "\xffb", 6},
+       {"012\x80bcb\x80210", "\xffb", 7},
 }
 
 // Execute f on each test case.  funcName should be the name of f; it's used
@@ -668,6 +677,9 @@ var trimTests = []struct {
        {"Trim", "* listitem", " *", "listitem"},
        {"Trim", `"quote"`, `"`, "quote"},
        {"Trim", "\u2C6F\u2C6F\u0250\u0250\u2C6F\u2C6F", "\u2C6F", "\u0250\u0250"},
+       {"Trim", "\x80test\xff", "\xff", "test"},
+       {"Trim", " Ġ ", " ", "Ġ"},
+       {"Trim", " Ġİ0", "0 ", "Ġİ"},
        //empty string tests
        {"Trim", "abba", "", "abba"},
        {"Trim", "", "123", ""},
@@ -1487,3 +1499,31 @@ func BenchmarkRepeat(b *testing.B) {
                Repeat("-", 80)
        }
 }
+
+func BenchmarkIndexAnyASCII(b *testing.B) {
+       x := Repeat("#", 4096) // Never matches set
+       cs := "0123456789abcdef"
+       for k := 1; k <= 4096; k <<= 4 {
+               for j := 1; j <= 16; j <<= 1 {
+                       b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+                               for i := 0; i < b.N; i++ {
+                                       IndexAny(x[:k], cs[:j])
+                               }
+                       })
+               }
+       }
+}
+
+func BenchmarkTrimASCII(b *testing.B) {
+       cs := "0123456789abcdef"
+       for k := 1; k <= 4096; k <<= 4 {
+               for j := 1; j <= 16; j <<= 1 {
+                       b.Run(fmt.Sprintf("%d:%d", k, j), func(b *testing.B) {
+                               x := Repeat(cs[:j], k) // Always matches set
+                               for i := 0; i < b.N; i++ {
+                                       Trim(x[:k], cs[:j])
+                               }
+                       })
+               }
+       }
+}