bytes, strings: optimize Trim for single byte cutsets

author Joe Tsai <joetsai@digital-static.net>

Sun, 30 May 2021 02:11:37 +0000 (19:11 -0700)

committer Joe Tsai <joetsai@digital-static.net>

Wed, 25 Aug 2021 19:29:15 +0000 (19:29 +0000)
author Joe Tsai <joetsai@digital-static.net>
Sun, 30 May 2021 02:11:37 +0000 (19:11 -0700)
committer Joe Tsai <joetsai@digital-static.net>
Wed, 25 Aug 2021 19:29:15 +0000 (19:29 +0000)
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go

index ce52649f132bb9575beacdca5af912f97478bb32..cd859d086db09228f201ae3a32cba7cb080f9738 100644 (file)
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -888,11 +888,6 @@ func (as *asciiSet) contains(c byte) bool {
  }
  
  func makeCutsetFunc(cutset string) func(r rune) bool {
-       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
-               return func(r rune) bool {
-                       return r == rune(cutset[0])
-               }
-       }
         if as, isASCII := makeASCIISet(cutset); isASCII {
                 return func(r rune) bool {
                         return r < utf8.RuneSelf && as.contains(byte(r))
@@ -911,21 +906,44 @@ func makeCutsetFunc(cutset string) func(r rune) bool {
  // Trim returns a subslice of s by slicing off all leading and
  // trailing UTF-8-encoded code points contained in cutset.
  func Trim(s []byte, cutset string) []byte {
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
+       }
         return TrimFunc(s, makeCutsetFunc(cutset))
  }
  
  // TrimLeft returns a subslice of s by slicing off all leading
  // UTF-8-encoded code points contained in cutset.
  func TrimLeft(s []byte, cutset string) []byte {
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimLeftByte(s, cutset[0])
+       }
         return TrimLeftFunc(s, makeCutsetFunc(cutset))
  }
  
+func trimLeftByte(s []byte, c byte) []byte {
+       for len(s) > 0 && s[0] == c {
+               s = s[1:]
+       }
+       return s
+}
+
  // TrimRight returns a subslice of s by slicing off all trailing
  // UTF-8-encoded code points that are contained in cutset.
  func TrimRight(s []byte, cutset string) []byte {
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimRightByte(s, cutset[0])
+       }
         return TrimRightFunc(s, makeCutsetFunc(cutset))
  }
  
+func trimRightByte(s []byte, c byte) []byte {
+       for len(s) > 0 && s[len(s)-1] == c {
+               s = s[:len(s)-1]
+       }
+       return s
+}
+
  // TrimSpace returns a subslice of s by slicing off all leading and
  // trailing white space, as defined by Unicode.
  func TrimSpace(s []byte) []byte {
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go

index 544ee46f908860dc969170c26d8f3ad472394eee..850b2ed061d8b1540a2d166c30b073be6dfac4bc 100644 (file)
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -1251,7 +1251,9 @@ var trimTests = []TrimTest{
         {"TrimLeft", "abba", "ab", ""},
         {"TrimRight", "abba", "ab", ""},
         {"TrimLeft", "abba", "a", "bba"},
+       {"TrimLeft", "abba", "b", "abba"},
         {"TrimRight", "abba", "a", "abb"},
+       {"TrimRight", "abba", "b", "abba"},
         {"Trim", "<tag>", "<>", "tag"},
         {"Trim", "* listitem", " *", "listitem"},
         {"Trim", `"quote"`, `"`, "quote"},
@@ -1963,6 +1965,13 @@ func BenchmarkTrimASCII(b *testing.B) {
         }
  }
  
+func BenchmarkTrimByte(b *testing.B) {
+       x := []byte("  the quick brown fox   ")
+       for i := 0; i < b.N; i++ {
+               Trim(x, " ")
+       }
+}
+
  func BenchmarkIndexPeriodic(b *testing.B) {
         key := []byte{1, 1}
         for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
diff --git a/src/strings/strings.go b/src/strings/strings.go

index b429735feadf9d26d29c22d52440f9eb5a749d7f..0df8d2eb281dc45658fc0ebfcc10201e83c19ef4 100644 (file)
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -818,11 +818,6 @@ func (as *asciiSet) contains(c byte) bool {
  }
  
  func makeCutsetFunc(cutset string) func(rune) bool {
-       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
-               return func(r rune) bool {
-                       return r == rune(cutset[0])
-               }
-       }
         if as, isASCII := makeASCIISet(cutset); isASCII {
                 return func(r rune) bool {
                         return r < utf8.RuneSelf && as.contains(byte(r))
@@ -837,6 +832,9 @@ func Trim(s, cutset string) string {
         if s == "" || cutset == "" {
                 return s
         }
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimLeftByte(trimRightByte(s, cutset[0]), cutset[0])
+       }
         return TrimFunc(s, makeCutsetFunc(cutset))
  }
  
@@ -848,9 +846,19 @@ func TrimLeft(s, cutset string) string {
         if s == "" || cutset == "" {
                 return s
         }
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimLeftByte(s, cutset[0])
+       }
         return TrimLeftFunc(s, makeCutsetFunc(cutset))
  }
  
+func trimLeftByte(s string, c byte) string {
+       for len(s) > 0 && s[0] == c {
+               s = s[1:]
+       }
+       return s
+}
+
  // TrimRight returns a slice of the string s, with all trailing
  // Unicode code points contained in cutset removed.
  //
@@ -859,9 +867,19 @@ func TrimRight(s, cutset string) string {
         if s == "" || cutset == "" {
                 return s
         }
+       if len(cutset) == 1 && cutset[0] < utf8.RuneSelf {
+               return trimRightByte(s, cutset[0])
+       }
         return TrimRightFunc(s, makeCutsetFunc(cutset))
  }
  
+func trimRightByte(s string, c byte) string {
+       for len(s) > 0 && s[len(s)-1] == c {
+               s = s[:len(s)-1]
+       }
+       return s
+}
+
  // TrimSpace returns a slice of the string s, with all leading
  // and trailing white space removed, as defined by Unicode.
  func TrimSpace(s string) string {
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go

index 09e5b27cc3857a6a98a0ef0b19cd05f1e9d536da..edc6c205907d3ab0e64f51a36b1499208564414f 100644 (file)
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -808,7 +808,9 @@ var trimTests = []struct {
         {"TrimLeft", "abba", "ab", ""},
         {"TrimRight", "abba", "ab", ""},
         {"TrimLeft", "abba", "a", "bba"},
+       {"TrimLeft", "abba", "b", "abba"},
         {"TrimRight", "abba", "a", "abb"},
+       {"TrimRight", "abba", "b", "abba"},
         {"Trim", "<tag>", "<>", "tag"},
         {"Trim", "* listitem", " *", "listitem"},
         {"Trim", `"quote"`, `"`, "quote"},
@@ -1860,6 +1862,13 @@ func BenchmarkTrimASCII(b *testing.B) {
         }
  }
  
+func BenchmarkTrimByte(b *testing.B) {
+       x := "  the quick brown fox   "
+       for i := 0; i < b.N; i++ {
+               Trim(x, " ")
+       }
+}
+
  func BenchmarkIndexPeriodic(b *testing.B) {
         key := "aa"
         for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
author	Joe Tsai <joetsai@digital-static.net>
	Sun, 30 May 2021 02:11:37 +0000 (19:11 -0700)
committer	Joe Tsai <joetsai@digital-static.net>
	Wed, 25 Aug 2021 19:29:15 +0000 (19:29 +0000)
src/bytes/bytes.go		patch \| blob \| history
src/bytes/bytes_test.go		patch \| blob \| history
src/strings/strings.go		patch \| blob \| history
src/strings/strings_test.go		patch \| blob \| history