]> Cypherpunks repositories - gostls13.git/commitdiff
bytes, strings: add ASCII fast path to EqualFold
authorCharlie Vieth <charlie.vieth@gmail.com>
Wed, 24 Aug 2022 18:23:28 +0000 (14:23 -0400)
committerGopher Robot <gobot@golang.org>
Wed, 21 Sep 2022 14:00:37 +0000 (14:00 +0000)
This commit adds an ASCII fast path to bytes/strings EqualFold that
roughly doubles performance when all characters are ASCII.

It also changes strings.EqualFold to use `for range` for the first
string since this is ~10% faster than using utf8.DecodeRuneInString for
both (see #31666).

Performance (similar results on arm64 and amd64):

name                        old time/op  new time/op  delta
EqualFold/Tests-10           238ns ± 0%   172ns ± 1%  -27.91%  (p=0.000 n=10+10)
EqualFold/ASCII-10          20.5ns ± 0%   9.7ns ± 0%  -52.73%  (p=0.000 n=10+10)
EqualFold/UnicodePrefix-10  86.5ns ± 0%  77.6ns ± 0%  -10.37%  (p=0.000 n=10+10)
EqualFold/UnicodeSuffix-10  86.8ns ± 2%  71.3ns ± 0%  -17.88%  (p=0.000 n=10+8)

Change-Id: I058f3f97a08dc04d65af895674d85420f920abe1
Reviewed-on: https://go-review.googlesource.com/c/go/+/425459
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/bytes/bytes.go
src/strings/strings.go
src/strings/strings_test.go

index 7b28cf1efcd242d257513af9f7e2c865ebe2723a..119b1f62b1a4f4525e63eb5a243b1ba8c6788b31 100644 (file)
@@ -1134,6 +1134,36 @@ func ReplaceAll(s, old, new []byte) []byte {
 // are equal under simple Unicode case-folding, which is a more general
 // form of case-insensitivity.
 func EqualFold(s, t []byte) bool {
+       // ASCII fast path
+       i := 0
+       for ; i < len(s) && i < len(t); i++ {
+               sr := s[i]
+               tr := t[i]
+               if sr|tr >= utf8.RuneSelf {
+                       goto hasUnicode
+               }
+
+               // Easy case.
+               if tr == sr {
+                       continue
+               }
+
+               // Make sr < tr to simplify what follows.
+               if tr < sr {
+                       tr, sr = sr, tr
+               }
+               // ASCII only, sr/tr must be upper/lower case
+               if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+                       continue
+               }
+               return false
+       }
+       // Check if we've exhausted both strings.
+       return len(s) == len(t)
+
+hasUnicode:
+       s = s[i:]
+       t = t[i:]
        for len(s) != 0 && len(t) != 0 {
                // Extract first rune from each.
                var sr, tr rune
index 7cf36865694bdd4afc7a5ec7f31d66f01d018fe6..fee161e4cc10f1105f02476a13633e044ca491c9 100644 (file)
@@ -1067,15 +1067,44 @@ func ReplaceAll(s, old, new string) string {
 // are equal under simple Unicode case-folding, which is a more general
 // form of case-insensitivity.
 func EqualFold(s, t string) bool {
-       for s != "" && t != "" {
-               // Extract first rune from each string.
-               var sr, tr rune
-               if s[0] < utf8.RuneSelf {
-                       sr, s = rune(s[0]), s[1:]
-               } else {
-                       r, size := utf8.DecodeRuneInString(s)
-                       sr, s = r, s[size:]
+       // ASCII fast path
+       i := 0
+       for ; i < len(s) && i < len(t); i++ {
+               sr := s[i]
+               tr := t[i]
+               if sr|tr >= utf8.RuneSelf {
+                       goto hasUnicode
                }
+
+               // Easy case.
+               if tr == sr {
+                       continue
+               }
+
+               // Make sr < tr to simplify what follows.
+               if tr < sr {
+                       tr, sr = sr, tr
+               }
+               // ASCII only, sr/tr must be upper/lower case
+               if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
+                       continue
+               }
+               return false
+       }
+       // Check if we've exhausted both strings.
+       return len(s) == len(t)
+
+hasUnicode:
+       s = s[i:]
+       t = t[i:]
+       for _, sr := range s {
+               // If t is exhausted the strings are not equal.
+               if len(t) == 0 {
+                       return false
+               }
+
+               // Extract first rune from second string.
+               var tr rune
                if t[0] < utf8.RuneSelf {
                        tr, t = rune(t[0]), t[1:]
                } else {
@@ -1115,8 +1144,8 @@ func EqualFold(s, t string) bool {
                return false
        }
 
-       // One string is empty. Are both?
-       return s == t
+       // First string is empty, so check if the second one is also empty.
+       return len(t) == 0
 }
 
 // Index returns the index of the first instance of substr in s, or -1 if substr is not present in s.
index 9323ff988d60a88592c6a6f4582da9494322031f..210bd9e44b4739ed11a1016802e38b58b81579fd 100644 (file)
@@ -1556,13 +1556,36 @@ func TestEqualFold(t *testing.T) {
 }
 
 func BenchmarkEqualFold(b *testing.B) {
-       for i := 0; i < b.N; i++ {
-               for _, tt := range EqualFoldTests {
-                       if out := EqualFold(tt.s, tt.t); out != tt.out {
-                               b.Fatal("wrong result")
+       b.Run("Tests", func(b *testing.B) {
+               for i := 0; i < b.N; i++ {
+                       for _, tt := range EqualFoldTests {
+                               if out := EqualFold(tt.s, tt.t); out != tt.out {
+                                       b.Fatal("wrong result")
+                               }
                        }
                }
-       }
+       })
+
+       const s1 = "abcdefghijKz"
+       const s2 = "abcDefGhijKz"
+
+       b.Run("ASCII", func(b *testing.B) {
+               for i := 0; i < b.N; i++ {
+                       EqualFold(s1, s2)
+               }
+       })
+
+       b.Run("UnicodePrefix", func(b *testing.B) {
+               for i := 0; i < b.N; i++ {
+                       EqualFold("αβδ"+s1, "ΑΒΔ"+s2)
+               }
+       })
+
+       b.Run("UnicodeSuffix", func(b *testing.B) {
+               for i := 0; i < b.N; i++ {
+                       EqualFold(s1+"αβδ", s2+"ΑΒΔ")
+               }
+       })
 }
 
 var CountTests = []struct {