// TrimSpace returns a subslice of s by slicing off all leading and
// trailing white space, as defined by Unicode.
func TrimSpace(s []byte) []byte {
- return TrimFunc(s, unicode.IsSpace)
+ // Fast path for ASCII: look for the first ASCII non-space byte
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if c >= utf8.RuneSelf {
+ // If we run into a non-ASCII byte, fall back to the
+ // slower unicode-aware method on the remaining bytes
+ return TrimFunc(s[start:], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // Now look for the first ASCII non-space byte from the end
+ stop := len(s)
+ for ; stop > start; stop-- {
+ c := s[stop-1]
+ if c >= utf8.RuneSelf {
+ return TrimFunc(s[start:stop], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // At this point s[start:stop] starts and ends with an ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have already
+ // been handled above.
+ return s[start:stop]
}
// Runes interprets s as a sequence of UTF-8-encoded code points.
}
func BenchmarkTrimSpace(b *testing.B) {
- s := []byte(" Some text. \n")
- for i := 0; i < b.N; i++ {
- TrimSpace(s)
+ tests := []struct {
+ name string
+ input []byte
+ }{
+ {"NoTrim", []byte("typical")},
+ {"ASCII", []byte(" foo bar ")},
+ {"SomeNonASCII", []byte(" \u2000\t\r\n x\t\t\r\r\ny\n \u3000 ")},
+ {"JustNonASCII", []byte("\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000")},
+ }
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ TrimSpace(test.input)
+ }
+ })
}
}
// TrimSpace returns a slice of the string s, with all leading
// and trailing white space removed, as defined by Unicode.
func TrimSpace(s string) string {
- return TrimFunc(s, unicode.IsSpace)
+ // Fast path for ASCII: look for the first ASCII non-space byte
+ start := 0
+ for ; start < len(s); start++ {
+ c := s[start]
+ if c >= utf8.RuneSelf {
+ // If we run into a non-ASCII byte, fall back to the
+ // slower unicode-aware method on the remaining bytes
+ return TrimFunc(s[start:], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // Now look for the first ASCII non-space byte from the end
+ stop := len(s)
+ for ; stop > start; stop-- {
+ c := s[stop-1]
+ if c >= utf8.RuneSelf {
+ return TrimFunc(s[start:stop], unicode.IsSpace)
+ }
+ if asciiSpace[c] == 0 {
+ break
+ }
+ }
+
+ // At this point s[start:stop] starts and ends with an ASCII
+ // non-space bytes, so we're done. Non-ASCII cases have already
+ // been handled above.
+ return s[start:stop]
}
// TrimPrefix returns s without the provided leading prefix string.
})
}
}
+
+func BenchmarkTrimSpace(b *testing.B) {
+ tests := []struct{ name, input string }{
+ {"NoTrim", "typical"},
+ {"ASCII", " foo bar "},
+ {"SomeNonASCII", " \u2000\t\r\n x\t\t\r\r\ny\n \u3000 "},
+ {"JustNonASCII", "\u2000\u2000\u2000☺☺☺☺\u3000\u3000\u3000"},
+ }
+ for _, test := range tests {
+ b.Run(test.name, func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ TrimSpace(test.input)
+ }
+ })
+ }
+}