// Valid reports whether p consists entirely of valid UTF-8-encoded runes.
func Valid(p []byte) bool {
+ // This optimization avoids the need to recompute the capacity
+ // when generating code for p[8:], bringing it to parity with
+ // ValidString, which was 20% faster on long ASCII strings.
+ p = p[:len(p):len(p)]
+
// Fast path. Check for and skip 8 bytes of ASCII characters per iteration.
for len(p) >= 8 {
// Combining two 32 bit loads allows the same code to be used
import (
"bytes"
+ "strings"
"testing"
"unicode"
. "unicode/utf8"
}
}
+var ascii100000 = strings.Repeat("0123456789", 10000)
+
func BenchmarkValidTenASCIIChars(b *testing.B) {
s := []byte("0123456789")
for i := 0; i < b.N; i++ {
}
}
+func BenchmarkValid100KASCIIChars(b *testing.B) {
+ s := []byte(ascii100000)
+ for i := 0; i < b.N; i++ {
+ Valid(s)
+ }
+}
+
func BenchmarkValidTenJapaneseChars(b *testing.B) {
s := []byte("日本語日本語日本語日")
for i := 0; i < b.N; i++ {
Valid(s)
}
}
+func BenchmarkValidLongMostlyASCII(b *testing.B) {
+ longMostlyASCII := []byte(longStringMostlyASCII)
+ for i := 0; i < b.N; i++ {
+ Valid(longMostlyASCII)
+ }
+}
+
+func BenchmarkValidLongJapanese(b *testing.B) {
+ longJapanese := []byte(longStringJapanese)
+ for i := 0; i < b.N; i++ {
+ Valid(longJapanese)
+ }
+}
func BenchmarkValidStringTenASCIIChars(b *testing.B) {
for i := 0; i < b.N; i++ {
}
}
+func BenchmarkValidString100KASCIIChars(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ValidString(ascii100000)
+ }
+}
+
func BenchmarkValidStringTenJapaneseChars(b *testing.B) {
for i := 0; i < b.N; i++ {
ValidString("日本語日本語日本語日")
}
}
+func BenchmarkValidStringLongMostlyASCII(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ValidString(longStringMostlyASCII)
+ }
+}
+
+func BenchmarkValidStringLongJapanese(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ ValidString(longStringJapanese)
+ }
+}
+
+var longStringMostlyASCII string // ~100KB, ~97% ASCII
+var longStringJapanese string // ~100KB, non-ASCII
+
+func init() {
+ const japanese = "日本語日本語日本語日"
+ var b bytes.Buffer
+ for i := 0; b.Len() < 100_000; i++ {
+ if i%100 == 0 {
+ b.WriteString(japanese)
+ } else {
+ b.WriteString("0123456789")
+ }
+ }
+ longStringMostlyASCII = b.String()
+ longStringJapanese = strings.Repeat(japanese, 100_000/len(japanese))
+}
+
func BenchmarkEncodeASCIIRune(b *testing.B) {
buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ {