s7 = 0x44 // accept 4, size 4
)
+const (
+ runeErrorByte0 = t3 | (RuneError >> 12)
+ runeErrorByte1 = tx | (RuneError>>6)&maskx
+ runeErrorByte2 = tx | RuneError&maskx
+)
+
// first is information about the first byte in a UTF-8 sequence.
var first = [256]uint8{
// 1 2 3 4 5 6 7 8 9 A B C D E F
// If the rune is out of range, it writes the encoding of [RuneError].
// It returns the number of bytes written.
func EncodeRune(p []byte, r rune) int {
- // Negative values are erroneous. Making it unsigned addresses the problem.
- switch i := uint32(r); {
- case i <= rune1Max:
+ // This function is inlineable for fast handling of ASCII.
+ if uint32(r) <= rune1Max {
p[0] = byte(r)
return 1
+ }
+ return encodeRuneNonASCII(p, r)
+}
+
+func encodeRuneNonASCII(p []byte, r rune) int {
+ // Negative values are erroneous. Making it unsigned addresses the problem.
+ switch i := uint32(r); {
case i <= rune2Max:
_ = p[1] // eliminate bounds checks
p[0] = t2 | byte(r>>6)
p[1] = tx | byte(r)&maskx
return 2
- case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
- r = RuneError
- fallthrough
- case i <= rune3Max:
+ case i < surrogateMin, surrogateMax < i && i <= rune3Max:
_ = p[2] // eliminate bounds checks
p[0] = t3 | byte(r>>12)
p[1] = tx | byte(r>>6)&maskx
p[2] = tx | byte(r)&maskx
return 3
- default:
+ case i > rune3Max && i <= MaxRune:
_ = p[3] // eliminate bounds checks
p[0] = t4 | byte(r>>18)
p[1] = tx | byte(r>>12)&maskx
p[2] = tx | byte(r>>6)&maskx
p[3] = tx | byte(r)&maskx
return 4
+ default:
+ _ = p[2] // eliminate bounds checks
+ p[0] = runeErrorByte0
+ p[1] = runeErrorByte1
+ p[2] = runeErrorByte2
+ return 3
}
}
switch i := uint32(r); {
case i <= rune2Max:
return append(p, t2|byte(r>>6), tx|byte(r)&maskx)
- case i > MaxRune, surrogateMin <= i && i <= surrogateMax:
- r = RuneError
- fallthrough
- case i <= rune3Max:
+ case i < surrogateMin, surrogateMax < i && i <= rune3Max:
return append(p, t3|byte(r>>12), tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
- default:
+ case i > rune3Max && i <= MaxRune:
return append(p, t4|byte(r>>18), tx|byte(r>>12)&maskx, tx|byte(r>>6)&maskx, tx|byte(r)&maskx)
+ default:
+ return append(p, runeErrorByte0, runeErrorByte1, runeErrorByte2)
}
}
func BenchmarkEncodeASCIIRune(b *testing.B) {
buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ {
- EncodeRune(buf, 'a')
+ EncodeRune(buf, 'a') // 1 byte
+ }
+}
+
+func BenchmarkEncodeSpanishRune(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ EncodeRune(buf, 'Ñ') // 2 bytes
}
}
func BenchmarkEncodeJapaneseRune(b *testing.B) {
buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ {
- EncodeRune(buf, '本')
+ EncodeRune(buf, '本') // 3 bytes
+ }
+}
+
+func BenchmarkEncodeMaxRune(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ EncodeRune(buf, MaxRune) // 4 bytes
+ }
+}
+
+func BenchmarkEncodeInvalidRuneMaxPlusOne(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ EncodeRune(buf, MaxRune+1) // 3 bytes: RuneError
+ }
+}
+
+func BenchmarkEncodeInvalidRuneSurrogate(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ EncodeRune(buf, 0xD800) // 3 bytes: RuneError
+ }
+}
+
+func BenchmarkEncodeInvalidRuneNegative(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ EncodeRune(buf, -1) // 3 bytes: RuneError
}
}
func BenchmarkAppendASCIIRune(b *testing.B) {
buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ {
- AppendRune(buf[:0], 'a')
+ AppendRune(buf[:0], 'a') // 1 byte
+ }
+}
+
+func BenchmarkAppendSpanishRune(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ AppendRune(buf[:0], 'Ñ') // 2 bytes
}
}
func BenchmarkAppendJapaneseRune(b *testing.B) {
buf := make([]byte, UTFMax)
for i := 0; i < b.N; i++ {
- AppendRune(buf[:0], '本')
+ AppendRune(buf[:0], '本') // 3 bytes
+ }
+}
+
+func BenchmarkAppendMaxRune(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ AppendRune(buf[:0], MaxRune) // 4 bytes
+ }
+}
+
+func BenchmarkAppendInvalidRuneMaxPlusOne(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ AppendRune(buf[:0], MaxRune+1) // 3 bytes: RuneError
+ }
+}
+
+func BenchmarkAppendInvalidRuneSurrogate(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ AppendRune(buf[:0], 0xD800) // 3 bytes: RuneError
+ }
+}
+
+func BenchmarkAppendInvalidRuneNegative(b *testing.B) {
+ buf := make([]byte, UTFMax)
+ for i := 0; i < b.N; i++ {
+ AppendRune(buf[:0], -1) // 3 bytes: RuneError
}
}