]> Cypherpunks repositories - gostls13.git/commitdiff
utf16: add DecodeRune, EncodeRune
authorRuss Cox <rsc@golang.org>
Wed, 21 Apr 2010 23:27:18 +0000 (16:27 -0700)
committerRuss Cox <rsc@golang.org>
Wed, 21 Apr 2010 23:27:18 +0000 (16:27 -0700)
R=r
CC=golang-dev
https://golang.org/cl/970041

src/pkg/utf16/utf16.go
src/pkg/utf16/utf16_test.go

index 303162452680c3e45a0b109e32ce154ae7dee60c..372e38a7181e8c16ae6e19bf60cc8b376c7d5961 100644 (file)
@@ -18,6 +18,33 @@ const (
        surrSelf = 0x10000
 )
 
+// IsSurrogate returns true if the specified Unicode code point
+// can appear in a surrogate pair.
+func IsSurrogate(rune int) bool {
+       return surr1 <= rune && rune < surr3
+}
+
+// DecodeRune returns the UTF-16 decoding of a surrogate pair.
+// If the pair is not a valid UTF-16 surrogate pair, DecodeRune returns
+// the Unicode replacement code point U+FFFD.
+func DecodeRune(r1, r2 int) int {
+       if surr1 <= r1 && r1 < surr2 && surr2 <= r2 && r2 < surr3 {
+               return (int(r1)-surr1)<<10 | (int(r2) - surr2) + 0x10000
+       }
+       return unicode.ReplacementChar
+}
+
+// EncodeRune returns the UTF-16 surrogate pair r1, r2 for the given rune.
+// If the rune is not a valid Unicode code point or does not need encoding,
+// EncodeRune returns U+FFFD, U+FFFD.
+func EncodeRune(rune int) (r1, r2 int) {
+       if rune < surrSelf || rune > unicode.MaxRune || IsSurrogate(rune) {
+               return unicode.ReplacementChar, unicode.ReplacementChar
+       }
+       rune -= surrSelf
+       return surr1 + (rune>>10)&0x3ff, surr2 + rune&0x3ff
+}
+
 // Encode returns the UTF-16 encoding of the Unicode code point sequence s.
 func Encode(s []int) []uint16 {
        n := len(s)
@@ -38,9 +65,9 @@ func Encode(s []int) []uint16 {
                        a[n] = uint16(v)
                        n++
                default:
-                       v -= surrSelf
-                       a[n] = uint16(surr1 + (v>>10)&0x3ff)
-                       a[n+1] = uint16(surr2 + v&0x3ff)
+                       r1, r2 := EncodeRune(v)
+                       a[n] = uint16(r1)
+                       a[n+1] = uint16(r2)
                        n += 2
                }
        }
@@ -57,7 +84,7 @@ func Decode(s []uint16) []int {
                case surr1 <= r && r < surr2 && i+1 < len(s) &&
                        surr2 <= s[i+1] && s[i+1] < surr3:
                        // valid surrogate sequence
-                       a[n] = (int(r)-surr1)<<10 | (int(s[i+1]) - surr2) + 0x10000
+                       a[n] = DecodeRune(int(r), int(s[i+1]))
                        i++
                        n++
                case surr1 <= r && r < surr3:
index c6e269aad059dea058554b9a1dab3d693784a28f..c0848aa387289e19ecd3bb3980f72d35c922d011 100644 (file)
@@ -8,6 +8,7 @@ import (
        "fmt"
        "reflect"
        "testing"
+       "unicode"
 )
 
 type encodeTest struct {
@@ -32,6 +33,41 @@ func TestEncode(t *testing.T) {
        }
 }
 
+func TestEncodeRune(t *testing.T) {
+       for i, tt := range encodeTests {
+               j := 0
+               for _, r := range tt.in {
+                       r1, r2 := EncodeRune(r)
+                       if r < 0x10000 || r > unicode.MaxRune {
+                               if j >= len(tt.out) {
+                                       t.Errorf("#%d: ran out of tt.out", i)
+                                       break
+                               }
+                               if r1 != unicode.ReplacementChar || r2 != unicode.ReplacementChar {
+                                       t.Errorf("EncodeRune(%#x) = %#x, %#x; want 0xfffd, 0xfffd", r, r1, r2)
+                               }
+                               j++
+                       } else {
+                               if j+1 >= len(tt.out) {
+                                       t.Errorf("#%d: ran out of tt.out", i)
+                                       break
+                               }
+                               if r1 != int(tt.out[j]) || r2 != int(tt.out[j+1]) {
+                                       t.Errorf("EncodeRune(%#x) = %#x, %#x; want %#x, %#x", r, r1, r2, tt.out[j], tt.out[j+1])
+                               }
+                               j += 2
+                               dec := DecodeRune(r1, r2)
+                               if dec != r {
+                                       t.Errorf("DecodeRune(%#x, %#x) = %#x; want %#x", r1, r2, dec, r)
+                               }
+                       }
+               }
+               if j != len(tt.out) {
+                       t.Errorf("#%d: EncodeRune didn't generate enough output", i)
+               }
+       }
+}
+
 type decodeTest struct {
        in  []uint16
        out []int