From: qmuntal Date: Fri, 27 May 2022 13:44:55 +0000 (+0200) Subject: unicode/utf16: add AppendRune X-Git-Tag: go1.20rc1~1516 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=48297f1fb15206cd13e0f31647e909292fb21975;p=gostls13.git unicode/utf16: add AppendRune AppendRune appends the UTF-16 encoding of a rune to a []uint16. BenchmarkEncodeValidASCII-12 24.61ns 16B 1allocs BenchmarkEncodeValidJapaneseChars-12 18.79ns 8B 1allocs BenchmarkAppendRuneValidASCII-12 6.826ns 0B 0allocs BenchmarkAppendRuneValidJapaneseChars-12 3.547ns 0B 0allocs The ASCII case is written to be inlineable. Fixes #51896 Change-Id: I593b1029f603297ef6e80e036f2fee2a0938d38d Reviewed-on: https://go-review.googlesource.com/c/go/+/409054 TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Run-TryBot: Ian Lance Taylor Auto-Submit: Ian Lance Taylor Reviewed-by: Joedian Reid --- diff --git a/api/next/51896.txt b/api/next/51896.txt new file mode 100644 index 0000000000..d4ef14cfa2 --- /dev/null +++ b/api/next/51896.txt @@ -0,0 +1 @@ +pkg unicode/utf16, func AppendRune([]uint16, int32) []uint16 #51896 \ No newline at end of file diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go index 1a881aa769..38d8be6060 100644 --- a/src/unicode/utf16/utf16.go +++ b/src/unicode/utf16/utf16.go @@ -83,6 +83,23 @@ func Encode(s []rune) []uint16 { return a[:n] } +// AppendRune appends the UTF-16 encoding of the Unicode code point r +// to the end of p and returns the extended buffer. If the rune is not +// a valid Unicode code point, it appends the encoding of U+FFFD. +func AppendRune(a []uint16, r rune) []uint16 { + // This function is inlineable for fast handling of ASCII. + switch { + case 0 <= r && r < surr1, surr3 <= r && r < surrSelf: + // normal rune + return append(a, uint16(r)) + case surrSelf <= r && r <= maxRune: + // needs surrogate sequence + r1, r2 := EncodeRune(r) + return append(a, uint16(r1), uint16(r2)) + } + return append(a, replacementChar) +} + // Decode returns the Unicode code point sequence represented // by the UTF-16 encoding s. func Decode(s []uint16) []rune { diff --git a/src/unicode/utf16/utf16_test.go b/src/unicode/utf16/utf16_test.go index 4ecaabef96..be339b1fdf 100644 --- a/src/unicode/utf16/utf16_test.go +++ b/src/unicode/utf16/utf16_test.go @@ -43,6 +43,18 @@ func TestEncode(t *testing.T) { } } +func TestAppendRune(t *testing.T) { + for _, tt := range encodeTests { + var out []uint16 + for _, u := range tt.in { + out = AppendRune(out, u) + } + if !reflect.DeepEqual(out, tt.out) { + t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out) + } + } +} + func TestEncodeRune(t *testing.T) { for i, tt := range encodeTests { j := 0 @@ -193,6 +205,28 @@ func BenchmarkEncodeValidJapaneseChars(b *testing.B) { } } +func BenchmarkAppendRuneValidASCII(b *testing.B) { + data := []rune{'h', 'e', 'l', 'l', 'o'} + a := make([]uint16, 0, len(data)*2) + for i := 0; i < b.N; i++ { + for _, u := range data { + a = AppendRune(a, u) + } + a = a[:0] + } +} + +func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) { + data := []rune{'日', '本', '語'} + a := make([]uint16, 0, len(data)*2) + for i := 0; i < b.N; i++ { + for _, u := range data { + a = AppendRune(a, u) + } + a = a[:0] + } +} + func BenchmarkEncodeRune(b *testing.B) { for i := 0; i < b.N; i++ { for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {