unicode/utf16: add AppendRune

author qmuntal <quimmuntal@gmail.com>

Fri, 27 May 2022 13:44:55 +0000 (15:44 +0200)

committer Gopher Robot <gobot@golang.org>

Fri, 19 Aug 2022 16:45:08 +0000 (16:45 +0000)
author qmuntal <quimmuntal@gmail.com>
Fri, 27 May 2022 13:44:55 +0000 (15:44 +0200)
committer Gopher Robot <gobot@golang.org>
Fri, 19 Aug 2022 16:45:08 +0000 (16:45 +0000)
diff --git a/api/next/51896.txt b/api/next/51896.txt

new file mode 100644 (file)

index 0000000..d4ef14c
--- /dev/null
+++ b/api/next/51896.txt
@@ -0,0 +1 @@
+pkg unicode/utf16, func AppendRune([]uint16, int32) []uint16 #51896
+\ No newline at end of file
diff --git a/src/unicode/utf16/utf16.go b/src/unicode/utf16/utf16.go

index 1a881aa769591e45e60219d6f0bbfbefffc0e069..38d8be6060293ee01986cc4bf01a2d0eff201a44 100644 (file)
--- a/src/unicode/utf16/utf16.go
+++ b/src/unicode/utf16/utf16.go
@@ -83,6 +83,23 @@ func Encode(s []rune) []uint16 {
         return a[:n]
  }
  
+// AppendRune appends the UTF-16 encoding of the Unicode code point r
+// to the end of p and returns the extended buffer. If the rune is not
+// a valid Unicode code point, it appends the encoding of U+FFFD.
+func AppendRune(a []uint16, r rune) []uint16 {
+       // This function is inlineable for fast handling of ASCII.
+       switch {
+       case 0 <= r && r < surr1, surr3 <= r && r < surrSelf:
+               // normal rune
+               return append(a, uint16(r))
+       case surrSelf <= r && r <= maxRune:
+               // needs surrogate sequence
+               r1, r2 := EncodeRune(r)
+               return append(a, uint16(r1), uint16(r2))
+       }
+       return append(a, replacementChar)
+}
+
  // Decode returns the Unicode code point sequence represented
  // by the UTF-16 encoding s.
  func Decode(s []uint16) []rune {
diff --git a/src/unicode/utf16/utf16_test.go b/src/unicode/utf16/utf16_test.go

index 4ecaabef96c59f37c14929d285a4147470724804..be339b1fdf137b54a7097a4295c8a65caefce959 100644 (file)
--- a/src/unicode/utf16/utf16_test.go
+++ b/src/unicode/utf16/utf16_test.go
@@ -43,6 +43,18 @@ func TestEncode(t *testing.T) {
         }
  }
  
+func TestAppendRune(t *testing.T) {
+       for _, tt := range encodeTests {
+               var out []uint16
+               for _, u := range tt.in {
+                       out = AppendRune(out, u)
+               }
+               if !reflect.DeepEqual(out, tt.out) {
+                       t.Errorf("AppendRune(%x) = %x; want %x", tt.in, out, tt.out)
+               }
+       }
+}
+
  func TestEncodeRune(t *testing.T) {
         for i, tt := range encodeTests {
                 j := 0
@@ -193,6 +205,28 @@ func BenchmarkEncodeValidJapaneseChars(b *testing.B) {
         }
  }
  
+func BenchmarkAppendRuneValidASCII(b *testing.B) {
+       data := []rune{'h', 'e', 'l', 'l', 'o'}
+       a := make([]uint16, 0, len(data)*2)
+       for i := 0; i < b.N; i++ {
+               for _, u := range data {
+                       a = AppendRune(a, u)
+               }
+               a = a[:0]
+       }
+}
+
+func BenchmarkAppendRuneValidJapaneseChars(b *testing.B) {
+       data := []rune{'日', '本', '語'}
+       a := make([]uint16, 0, len(data)*2)
+       for i := 0; i < b.N; i++ {
+               for _, u := range data {
+                       a = AppendRune(a, u)
+               }
+               a = a[:0]
+       }
+}
+
  func BenchmarkEncodeRune(b *testing.B) {
         for i := 0; i < b.N; i++ {
                 for _, u := range []rune{'𝓐', '𝓑', '𝓒', '𝓓', '𝓔'} {
author	qmuntal <quimmuntal@gmail.com>
	Fri, 27 May 2022 13:44:55 +0000 (15:44 +0200)
committer	Gopher Robot <gobot@golang.org>
	Fri, 19 Aug 2022 16:45:08 +0000 (16:45 +0000)
api/next/51896.txt	[new file with mode: 0644]	patch \| blob
src/unicode/utf16/utf16.go		patch \| blob \| history
src/unicode/utf16/utf16_test.go		patch \| blob \| history