]> Cypherpunks repositories - gostls13.git/commitdiff
utf16: new package
authorRuss Cox <rsc@golang.org>
Wed, 17 Mar 2010 01:44:37 +0000 (18:44 -0700)
committerRuss Cox <rsc@golang.org>
Wed, 17 Mar 2010 01:44:37 +0000 (18:44 -0700)
needed for interacting with various legacy interfaces,
like Windows and the Mac OS clipboard.

R=r
CC=golang-dev
https://golang.org/cl/595041

src/pkg/Makefile
src/pkg/utf16/Makefile [new file with mode: 0644]
src/pkg/utf16/utf16.go [new file with mode: 0644]
src/pkg/utf16/utf16_test.go [new file with mode: 0644]

index d3f0906cf6b3d4637c472bcda112c3ba7c7f5246..0807d6f937692b1f37489d672dda32a85aa19791 100644 (file)
@@ -117,6 +117,7 @@ DIRS=\
        testing/script\
        time\
        unicode\
+       utf16\
        utf8\
        websocket\
        xgb\
diff --git a/src/pkg/utf16/Makefile b/src/pkg/utf16/Makefile
new file mode 100644 (file)
index 0000000..29e4005
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include ../../Make.$(GOARCH)
+
+TARG=utf16
+GOFILES=\
+       utf16.go\
+
+include ../../Make.pkg
diff --git a/src/pkg/utf16/utf16.go b/src/pkg/utf16/utf16.go
new file mode 100644 (file)
index 0000000..3031624
--- /dev/null
@@ -0,0 +1,74 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package utf16 implements encoding and decoding of UTF-16 sequences.
+package utf16
+
+import "unicode"
+
+const (
+       // 0xd800-0xdc00 encodes the high 10 bits of a pair.
+       // 0xdc00-0xe000 encodes the low 10 bits of a pair.
+       // the value is those 20 bits plus 0x10000.
+       surr1 = 0xd800
+       surr2 = 0xdc00
+       surr3 = 0xe000
+
+       surrSelf = 0x10000
+)
+
+// Encode returns the UTF-16 encoding of the Unicode code point sequence s.
+func Encode(s []int) []uint16 {
+       n := len(s)
+       for _, v := range s {
+               if v >= surrSelf {
+                       n++
+               }
+       }
+
+       a := make([]uint16, n)
+       n = 0
+       for _, v := range s {
+               switch {
+               case v < 0, surr1 <= v && v < surr3, v > unicode.MaxRune:
+                       v = unicode.ReplacementChar
+                       fallthrough
+               case v < surrSelf:
+                       a[n] = uint16(v)
+                       n++
+               default:
+                       v -= surrSelf
+                       a[n] = uint16(surr1 + (v>>10)&0x3ff)
+                       a[n+1] = uint16(surr2 + v&0x3ff)
+                       n += 2
+               }
+       }
+       return a[0:n]
+}
+
+// Decode returns the Unicode code point sequence represented
+// by the UTF-16 encoding s.
+func Decode(s []uint16) []int {
+       a := make([]int, len(s))
+       n := 0
+       for i := 0; i < len(s); i++ {
+               switch r := s[i]; {
+               case surr1 <= r && r < surr2 && i+1 < len(s) &&
+                       surr2 <= s[i+1] && s[i+1] < surr3:
+                       // valid surrogate sequence
+                       a[n] = (int(r)-surr1)<<10 | (int(s[i+1]) - surr2) + 0x10000
+                       i++
+                       n++
+               case surr1 <= r && r < surr3:
+                       // invalid surrogate sequence
+                       a[n] = unicode.ReplacementChar
+                       n++
+               default:
+                       // normal rune
+                       a[n] = int(r)
+                       n++
+               }
+       }
+       return a[0:n]
+}
diff --git a/src/pkg/utf16/utf16_test.go b/src/pkg/utf16/utf16_test.go
new file mode 100644 (file)
index 0000000..c6e269a
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package utf16
+
+import (
+       "fmt"
+       "reflect"
+       "testing"
+)
+
+type encodeTest struct {
+       in  []int
+       out []uint16
+}
+
+var encodeTests = []encodeTest{
+       encodeTest{[]int{1, 2, 3, 4}, []uint16{1, 2, 3, 4}},
+       encodeTest{[]int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff},
+               []uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff}},
+       encodeTest{[]int{'a', 'b', 0xd7ff, 0xd800, 0xdfff, 0xe000, 0x110000, -1},
+               []uint16{'a', 'b', 0xd7ff, 0xfffd, 0xfffd, 0xe000, 0xfffd, 0xfffd}},
+}
+
+func TestEncode(t *testing.T) {
+       for _, tt := range encodeTests {
+               out := Encode(tt.in)
+               if !reflect.DeepEqual(out, tt.out) {
+                       t.Errorf("Encode(%v) = %v; want %v", hex(tt.in), hex16(out), hex16(tt.out))
+               }
+       }
+}
+
+type decodeTest struct {
+       in  []uint16
+       out []int
+}
+
+var decodeTests = []decodeTest{
+       decodeTest{[]uint16{1, 2, 3, 4}, []int{1, 2, 3, 4}},
+       decodeTest{[]uint16{0xffff, 0xd800, 0xdc00, 0xd800, 0xdc01, 0xd808, 0xdf45, 0xdbff, 0xdfff},
+               []int{0xffff, 0x10000, 0x10001, 0x12345, 0x10ffff}},
+       decodeTest{[]uint16{0xd800, 'a'}, []int{0xfffd, 'a'}},
+       decodeTest{[]uint16{0xdfff}, []int{0xfffd}},
+}
+
+func TestDecode(t *testing.T) {
+       for _, tt := range decodeTests {
+               out := Decode(tt.in)
+               if !reflect.DeepEqual(out, tt.out) {
+                       t.Errorf("Decode(%v) = %v; want %v", hex16(tt.in), hex(out), hex(tt.out))
+               }
+       }
+}
+
+type hex []int
+
+func (h hex) Format(f fmt.State, c int) {
+       fmt.Fprint(f, "[")
+       for i, v := range h {
+               if i > 0 {
+                       fmt.Fprint(f, " ")
+               }
+               fmt.Fprintf(f, "%x", v)
+       }
+       fmt.Fprint(f, "]")
+}
+
+type hex16 []uint16
+
+func (h hex16) Format(f fmt.State, c int) {
+       fmt.Fprint(f, "[")
+       for i, v := range h {
+               if i > 0 {
+                       fmt.Fprint(f, " ")
+               }
+               fmt.Fprintf(f, "%x", v)
+       }
+       fmt.Fprint(f, "]")
+}