byte,strings: improve IndexRune performance by ~45%

author Charlie Vieth <charlie.vieth@gmail.com>

Thu, 2 Nov 2023 04:18:59 +0000 (00:18 -0400)

committer Gopher Robot <gobot@golang.org>

Wed, 14 Aug 2024 18:25:29 +0000 (18:25 +0000)
author Charlie Vieth <charlie.vieth@gmail.com>
Thu, 2 Nov 2023 04:18:59 +0000 (00:18 -0400)
committer Gopher Robot <gobot@golang.org>
Wed, 14 Aug 2024 18:25:29 +0000 (18:25 +0000)
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go

index 5c03e54d78761f9e4fd137a0a5dc4b547efcc4c5..ac15ab9b69a56d32ee953173c453dc6c07c20443 100644 (file)
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -137,6 +137,7 @@ func LastIndexByte(s []byte, c byte) int {
  // If r is [utf8.RuneError], it returns the first instance of any
  // invalid UTF-8 byte sequence.
  func IndexRune(s []byte, r rune) int {
+       const haveFastIndex = bytealg.MaxBruteForce > 0
         switch {
         case 0 <= r && r < utf8.RuneSelf:
                 return IndexByte(s, byte(r))
@@ -152,9 +153,64 @@ func IndexRune(s []byte, r rune) int {
         case !utf8.ValidRune(r):
                 return -1
         default:
+               // Search for rune r using the last byte of its UTF-8 encoded form.
+               // The distribution of the last byte is more uniform compared to the
+               // first byte which has a 78% chance of being [240, 243, 244].
                 var b [utf8.UTFMax]byte
                 n := utf8.EncodeRune(b[:], r)
-               return Index(s, b[:n])
+               last := n - 1
+               i := last
+               fails := 0
+               for i < len(s) {
+                       if s[i] != b[last] {
+                               o := IndexByte(s[i+1:], b[last])
+                               if o < 0 {
+                                       return -1
+                               }
+                               i += o + 1
+                       }
+                       // Step backwards comparing bytes.
+                       for j := 1; j < n; j++ {
+                               if s[i-j] != b[last-j] {
+                                       goto next
+                               }
+                       }
+                       return i - last
+               next:
+                       fails++
+                       i++
+                       if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) ||
+                               (!haveFastIndex && fails >= 4+i>>4 && i < len(s)) {
+                               goto fallback
+                       }
+               }
+               return -1
+
+       fallback:
+               // Switch to bytealg.Index, if available, or a brute for search when
+               // IndexByte returns too many false positives.
+               if haveFastIndex {
+                       if j := bytealg.Index(s[i-last:], b[:n]); j >= 0 {
+                               return i + j - last
+                       }
+               } else {
+                       // If bytealg.Index is not available a brute force search is
+                       // ~1.5-3x faster than Rabin-Karp since n is small.
+                       c0 := b[last]
+                       c1 := b[last-1] // There are at least 2 chars to match
+               loop:
+                       for ; i < len(s); i++ {
+                               if s[i] == c0 && s[i-1] == c1 {
+                                       for k := 2; k < n; k++ {
+                                               if s[i-k] != b[last-k] {
+                                                       continue loop
+                                               }
+                                       }
+                                       return i - last
+                               }
+                       }
+               }
+               return -1
         }
  }
  
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go

index 637880a4f7272c4ee8f6b49201f7a69c9bc0e446..da16882e82a5807f0d2cdc5ebf47278a8cddb305 100644 (file)
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -197,6 +197,11 @@ var indexTests = []BinOpTest{
         {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
         // test fallback to Rabin-Karp.
         {"000000000000000000000000000000000000000000000000000000000000000000000001", "0000000000000000000000000000000000000000000000000000000000000000001", 5},
+       // test fallback to IndexRune
+       {"oxoxoxoxoxoxoxoxoxoxox☺", "☺", 22},
+       // invalid UTF-8 byte sequence (must be longer than bytealg.MaxBruteForce to
+       // test that we don't use IndexRune)
+       {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx\xed\x9f\xc0", "\xed\x9f\xc0", 105},
  }
  
  var lastIndexTests = []BinOpTest{
@@ -445,6 +450,31 @@ func TestIndexRune(t *testing.T) {
                 {"some_text=some_value", '=', 9},
                 {"☺a", 'a', 3},
                 {"a☻☺b", '☺', 4},
+               {"𠀳𠀗𠀾𠁄𠀧𠁆𠁂𠀫𠀖𠀪𠀲𠀴𠁀𠀨𠀿", '𠀿', 56},
+
+               // 2 bytes
+               {"ӆ", 'ӆ', 0},
+               {"a", 'ӆ', -1},
+               {"  ӆ", 'ӆ', 2},
+               {"  a", 'ӆ', -1},
+               {strings.Repeat("ц", 64) + "ӆ", 'ӆ', 128}, // test cutover
+               {strings.Repeat("ц", 64), 'ӆ', -1},
+
+               // 3 bytes
+               {"Ꚁ", 'Ꚁ', 0},
+               {"a", 'Ꚁ', -1},
+               {"  Ꚁ", 'Ꚁ', 2},
+               {"  a", 'Ꚁ', -1},
+               {strings.Repeat("Ꙁ", 64) + "Ꚁ", 'Ꚁ', 192}, // test cutover
+               {strings.Repeat("Ꙁ", 64) + "Ꚁ", '䚀', -1},  // 'Ꚁ' and '䚀' share the same last two bytes
+
+               // 4 bytes
+               {"𡌀", '𡌀', 0},
+               {"a", '𡌀', -1},
+               {"  𡌀", '𡌀', 2},
+               {"  a", '𡌀', -1},
+               {strings.Repeat("𡋀", 64) + "𡌀", '𡌀', 256}, // test cutover
+               {strings.Repeat("𡋀", 64) + "𡌀", '𣌀', -1},  // '𡌀' and '𣌀' share the same last two bytes
  
                 // RuneError should match any invalid UTF-8 byte sequence.
                 {"�", '�', 0},
@@ -458,6 +488,13 @@ func TestIndexRune(t *testing.T) {
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+
+               // Test the cutover to to bytealg.Index when it is triggered in
+               // the middle of rune that contains consecutive runs of equal bytes.
+               {"aaaaaKKKK\U000bc104", '\U000bc104', 17}, // cutover: (n + 16) / 8
+               {"aaaaaKKKK鄄", '鄄', 17},
+               {"aaKKKKKa\U000bc104", '\U000bc104', 18}, // cutover: 4 + n>>4
+               {"aaKKKKKa鄄", '鄄', 18},
         }
         for _, tt := range tests {
                 if got := IndexRune([]byte(tt.in), tt.rune); got != tt.want {
@@ -605,6 +642,21 @@ func BenchmarkIndexRuneASCII(b *testing.B) {
         benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune))
  }
  
+func BenchmarkIndexRuneUnicode(b *testing.B) {
+       b.Run("Latin", func(b *testing.B) {
+               // Latin is mostly 1, 2, 3 byte runes.
+               benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Latin, 'é'))
+       })
+       b.Run("Cyrillic", func(b *testing.B) {
+               // Cyrillic is mostly 2 and 3 byte runes.
+               benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Cyrillic, 'Ꙁ'))
+       })
+       b.Run("Han", func(b *testing.B) {
+               // Han consists only of 3 and 4 byte runes.
+               benchBytes(b, indexSizes, bmIndexRuneUnicode(unicode.Han, '𠀿'))
+       })
+}
+
  func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) {
         return func(b *testing.B, n int) {
                 buf := bmbuf[0:n]
@@ -635,6 +687,61 @@ func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) {
         }
  }
  
+func bmIndexRuneUnicode(rt *unicode.RangeTable, needle rune) func(b *testing.B, n int) {
+       var rs []rune
+       for _, r16 := range rt.R16 {
+               for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
+                       if r != needle {
+                               rs = append(rs, rune(r))
+                       }
+               }
+       }
+       for _, r32 := range rt.R32 {
+               for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
+                       if r != needle {
+                               rs = append(rs, rune(r))
+                       }
+               }
+       }
+       // Shuffle the runes so that they are not in descending order.
+       // The sort is deterministic since this is used for benchmarks,
+       // which need to be repeatable.
+       rr := rand.New(rand.NewSource(1))
+       rr.Shuffle(len(rs), func(i, j int) {
+               rs[i], rs[j] = rs[j], rs[i]
+       })
+       uchars := string(rs)
+
+       return func(b *testing.B, n int) {
+               buf := bmbuf[0:n]
+               o := copy(buf, uchars)
+               for o < len(buf) {
+                       o += copy(buf[o:], uchars)
+               }
+
+               // Make space for the needle rune at the end of buf.
+               m := utf8.RuneLen(needle)
+               for o := m; o > 0; {
+                       _, sz := utf8.DecodeLastRune(buf)
+                       copy(buf[len(buf)-sz:], "\x00\x00\x00\x00")
+                       buf = buf[:len(buf)-sz]
+                       o -= sz
+               }
+               buf = utf8.AppendRune(buf[:n-m], needle)
+
+               n -= m // adjust for rune len
+               for i := 0; i < b.N; i++ {
+                       j := IndexRune(buf, needle)
+                       if j != n {
+                               b.Fatal("bad index", j)
+                       }
+               }
+               for i := range buf {
+                       buf[i] = '\x00'
+               }
+       }
+}
+
  func BenchmarkEqual(b *testing.B) {
         b.Run("0", func(b *testing.B) {
                 var buf [4]byte
@@ -2077,6 +2184,11 @@ func makeBenchInputHard() []byte {
  var benchInputHard = makeBenchInputHard()
  
  func benchmarkIndexHard(b *testing.B, sep []byte) {
+       n := Index(benchInputHard, sep)
+       if n < 0 {
+               n = len(benchInputHard)
+       }
+       b.SetBytes(int64(n))
         for i := 0; i < b.N; i++ {
                 Index(benchInputHard, sep)
         }
diff --git a/src/strings/strings.go b/src/strings/strings.go

index 0729c4ad425d34427eca9422a4777415fda22664..7eb2de635c3b3c7a2307553aa06da94a9ecac485 100644 (file)
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -125,6 +125,7 @@ func IndexByte(s string, c byte) int {
  // If r is [utf8.RuneError], it returns the first instance of any
  // invalid UTF-8 byte sequence.
  func IndexRune(s string, r rune) int {
+       const haveFastIndex = bytealg.MaxBruteForce > 0
         switch {
         case 0 <= r && r < utf8.RuneSelf:
                 return IndexByte(s, byte(r))
@@ -138,7 +139,60 @@ func IndexRune(s string, r rune) int {
         case !utf8.ValidRune(r):
                 return -1
         default:
-               return Index(s, string(r))
+               // Search for rune r using the last byte of its UTF-8 encoded form.
+               // The distribution of the last byte is more uniform compared to the
+               // first byte which has a 78% chance of being [240, 243, 244].
+               rs := string(r)
+               last := len(rs) - 1
+               i := last
+               fails := 0
+               for i < len(s) {
+                       if s[i] != rs[last] {
+                               o := IndexByte(s[i+1:], rs[last])
+                               if o < 0 {
+                                       return -1
+                               }
+                               i += o + 1
+                       }
+                       // Step backwards comparing bytes.
+                       for j := 1; j < len(rs); j++ {
+                               if s[i-j] != rs[last-j] {
+                                       goto next
+                               }
+                       }
+                       return i - last
+               next:
+                       fails++
+                       i++
+                       if (haveFastIndex && fails > bytealg.Cutover(i)) && i < len(s) ||
+                               (!haveFastIndex && fails >= 4+i>>4 && i < len(s)) {
+                               goto fallback
+                       }
+               }
+               return -1
+
+       fallback:
+               // see comment in ../bytes/bytes.go
+               if haveFastIndex {
+                       if j := bytealg.IndexString(s[i-last:], string(r)); j >= 0 {
+                               return i + j - last
+                       }
+               } else {
+                       c0 := rs[last]
+                       c1 := rs[last-1]
+               loop:
+                       for ; i < len(s); i++ {
+                               if s[i] == c0 && s[i-1] == c1 {
+                                       for k := 2; k < len(rs); k++ {
+                                               if s[i-k] != rs[last-k] {
+                                                       continue loop
+                                               }
+                                       }
+                                       return i - last
+                               }
+                       }
+               }
+               return -1
         }
  }
  
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go

index acbf3ede7b4e8fca842aa28c8d455b9cfbd61158..39f5f4e35579fb531337beb8939c9ca4dc9b0d18 100644 (file)
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -155,6 +155,11 @@ var indexTests = []IndexTest{
         // test fallback to Rabin-Karp.
         {"oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22},
         {"oxoxoxoxoxoxoxoxoxoxoxox", "oy", -1},
+       // test fallback to IndexRune
+       {"oxoxoxoxoxoxoxoxoxoxox☺", "☺", 22},
+       // invalid UTF-8 byte sequence (must be longer than bytealg.MaxBruteForce to
+       // test that we don't use IndexRune)
+       {"xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx\xed\x9f\xc0", "\xed\x9f\xc0", 105},
  }
  
  var lastIndexTests = []IndexTest{
@@ -326,6 +331,37 @@ func TestIndexRune(t *testing.T) {
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", -1, -1},
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", 0xD800, -1}, // Surrogate pair
                 {"a☺b☻c☹d\xe2\x98�\xff�\xed\xa0\x80", utf8.MaxRune + 1, -1},
+
+               // 2 bytes
+               {"ӆ", 'ӆ', 0},
+               {"a", 'ӆ', -1},
+               {"  ӆ", 'ӆ', 2},
+               {"  a", 'ӆ', -1},
+               {Repeat("ц", 64) + "ӆ", 'ӆ', 128}, // test cutover
+               {Repeat("Ꙁ", 64) + "Ꚁ", '䚀', -1},  // 'Ꚁ' and '䚀' share the same last two bytes
+
+               // 3 bytes
+               {"Ꚁ", 'Ꚁ', 0},
+               {"a", 'Ꚁ', -1},
+               {"  Ꚁ", 'Ꚁ', 2},
+               {"  a", 'Ꚁ', -1},
+               {Repeat("Ꙁ", 64) + "Ꚁ", 'Ꚁ', 192}, // test cutover
+               {Repeat("𡋀", 64) + "𡌀", '𣌀', -1},  // '𡌀' and '𣌀' share the same last two bytes
+
+               // 4 bytes
+               {"𡌀", '𡌀', 0},
+               {"a", '𡌀', -1},
+               {"  𡌀", '𡌀', 2},
+               {"  a", '𡌀', -1},
+               {Repeat("𡋀", 64) + "𡌀", '𡌀', 256}, // test cutover
+               {Repeat("𡋀", 64), '𡌀', -1},
+
+               // Test the cutover to to bytealg.IndexString when it is triggered in
+               // the middle of rune that contains consecutive runs of equal bytes.
+               {"aaaaaKKKK\U000bc104", '\U000bc104', 17}, // cutover: (n + 16) / 8
+               {"aaaaaKKKK鄄", '鄄', 17},
+               {"aaKKKKKa\U000bc104", '\U000bc104', 18}, // cutover: 4 + n>>4
+               {"aaKKKKKa鄄", '鄄', 18},
         }
         for _, tt := range tests {
                 if got := IndexRune(tt.in, tt.rune); got != tt.want {
@@ -333,13 +369,14 @@ func TestIndexRune(t *testing.T) {
                 }
         }
  
-       haystack := "test世界"
+       // Make sure we trigger the cutover and string(rune) conversion.
+       haystack := "test" + Repeat("𡋀", 32) + "𡌀"
         allocs := testing.AllocsPerRun(1000, func() {
                 if i := IndexRune(haystack, 's'); i != 2 {
                         t.Fatalf("'s' at %d; want 2", i)
                 }
-               if i := IndexRune(haystack, '世'); i != 4 {
-                       t.Fatalf("'世' at %d; want 4", i)
+               if i := IndexRune(haystack, '𡌀'); i != 132 {
+                       t.Fatalf("'𡌀' at %d; want 4", i)
                 }
         })
         if allocs != 0 && testing.CoverMode() == "" {
author	Charlie Vieth <charlie.vieth@gmail.com>
	Thu, 2 Nov 2023 04:18:59 +0000 (00:18 -0400)
committer	Gopher Robot <gobot@golang.org>
	Wed, 14 Aug 2024 18:25:29 +0000 (18:25 +0000)
src/bytes/bytes.go		patch \| blob \| history
src/bytes/bytes_test.go		patch \| blob \| history
src/strings/strings.go		patch \| blob \| history
src/strings/strings_test.go		patch \| blob \| history