}
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
-func (rb *reorderBuffer) appendRune(rune uint32) {
+func (rb *reorderBuffer) appendRune(r uint32) {
bn := rb.nbyte
- sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
+ sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.nbyte += utf8.UTFMax
rb.rune[rb.nrune] = runeInfo{bn, uint8(sz), 0, 0}
rb.nrune++
}
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
-func (rb *reorderBuffer) assignRune(pos int, rune uint32) {
+func (rb *reorderBuffer) assignRune(pos int, r uint32) {
bn := rb.rune[pos].pos
- sz := utf8.EncodeRune(rb.byte[bn:], int(rune))
+ sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
rb.rune[pos] = runeInfo{bn, uint8(sz), 0, 0}
}
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func (rb *reorderBuffer) runeAt(n int) uint32 {
inf := rb.rune[n]
- rune, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
- return uint32(rune)
+ r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
+ return uint32(r)
}
// bytesAt returns the UTF-8 encoding of the rune at position n.
// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
-func (rb *reorderBuffer) decomposeHangul(rune uint32) bool {
+func (rb *reorderBuffer) decomposeHangul(r uint32) bool {
b := rb.rune[:]
n := rb.nrune
if n+3 > len(b) {
return false
}
- rune -= hangulBase
- x := rune % jamoTCount
- rune /= jamoTCount
- rb.appendRune(jamoLBase + rune/jamoVCount)
- rb.appendRune(jamoVBase + rune%jamoVCount)
+ r -= hangulBase
+ x := r % jamoTCount
+ r /= jamoTCount
+ rb.appendRune(jamoLBase + r/jamoVCount)
+ rb.appendRune(jamoVBase + r%jamoVCount)
if x != 0 {
rb.appendRune(jamoTBase + x)
}
// TestCase is used for most tests.
type TestCase struct {
- in []int
- out []int
+ in []rune
+ out []rune
}
-type insertFunc func(rb *reorderBuffer, rune int) bool
+type insertFunc func(rb *reorderBuffer, r rune) bool
-func insert(rb *reorderBuffer, rune int) bool {
- src := inputString(string(rune))
+func insert(rb *reorderBuffer, r rune) bool {
+ src := inputString(string(r))
return rb.insert(src, 0, rb.f.info(src, 0))
}
continue
}
for j, want := range test.out {
- found := int(rb.runeAt(j))
+ found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: runeAt(%d) = %U; want %U", name, i, j, found, want)
}
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", len(out))
}
- for _, r := range []int("world!") {
+ for _, r := range []rune("world!") {
insert(&rb, r)
}
}
var insertTests = []TestCase{
- {[]int{'a'}, []int{'a'}},
- {[]int{0x300}, []int{0x300}},
- {[]int{0x300, 0x316}, []int{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
- {[]int{0x316, 0x300}, []int{0x316, 0x300}},
- {[]int{0x41, 0x316, 0x300}, []int{0x41, 0x316, 0x300}},
- {[]int{0x41, 0x300, 0x316}, []int{0x41, 0x316, 0x300}},
- {[]int{0x300, 0x316, 0x41}, []int{0x316, 0x300, 0x41}},
- {[]int{0x41, 0x300, 0x40, 0x316}, []int{0x41, 0x300, 0x40, 0x316}},
+ {[]rune{'a'}, []rune{'a'}},
+ {[]rune{0x300}, []rune{0x300}},
+ {[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
+ {[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
+ {[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
+ {[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
+ {[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
+ {[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
}
var decompositionNFDTest = []TestCase{
- {[]int{0xC0}, []int{0x41, 0x300}},
- {[]int{0xAC00}, []int{0x1100, 0x1161}},
- {[]int{0x01C4}, []int{0x01C4}},
- {[]int{0x320E}, []int{0x320E}},
- {[]int("음ẻ과"), []int{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
+ {[]rune{0xC0}, []rune{0x41, 0x300}},
+ {[]rune{0xAC00}, []rune{0x1100, 0x1161}},
+ {[]rune{0x01C4}, []rune{0x01C4}},
+ {[]rune{0x320E}, []rune{0x320E}},
+ {[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
- {[]int{0xC0}, []int{0x41, 0x300}},
- {[]int{0xAC00}, []int{0x1100, 0x1161}},
- {[]int{0x01C4}, []int{0x44, 0x5A, 0x030C}},
- {[]int{0x320E}, []int{0x28, 0x1100, 0x1161, 0x29}},
+ {[]rune{0xC0}, []rune{0x41, 0x300}},
+ {[]rune{0xAC00}, []rune{0x1100, 0x1161}},
+ {[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
+ {[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
}
var compositionTest = []TestCase{
- {[]int{0x41, 0x300}, []int{0xC0}},
- {[]int{0x41, 0x316}, []int{0x41, 0x316}},
- {[]int{0x41, 0x300, 0x35D}, []int{0xC0, 0x35D}},
- {[]int{0x41, 0x316, 0x300}, []int{0xC0, 0x316}},
+ {[]rune{0x41, 0x300}, []rune{0xC0}},
+ {[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
+ {[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
+ {[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
- {[]int{0x41, 0x316, 0x40, 0x300}, []int{0x41, 0x316, 0x40, 0x300}},
- {[]int{0x1100, 0x1161}, []int{0xAC00}},
+ {[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
+ {[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
- {[]int{0x28, 0x1100, 0x1161, 0x29}, []int{0x28, 0xAC00, 0x29}},
+ {[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
// This contains only the properties we're interested in.
type Char struct {
name string
- codePoint int // if zero, this index is not a valid code point.
+ codePoint rune // if zero, this index is not a valid code point.
ccc uint8 // canonical combining class
excludeInComp bool // from CompositionExclusions.txt
compatDecomp bool // it has a compatibility expansion
SMissing
)
-var lastChar int = 0
+var lastChar = rune('\u0000')
func (c Char) isValid() bool {
return c.codePoint != 0 && c.state != SMissing
return buf.String()
}
-type Decomposition []int
+type Decomposition []rune
func (d Decomposition) String() string {
return fmt.Sprintf("%.4X", d)
return
}
-func parseDecomposition(s string, skipfirst bool) (a []int, e os.Error) {
+func parseDecomposition(s string, skipfirst bool) (a []rune, e os.Error) {
decomp := strings.Split(s, " ")
if len(decomp) > 0 && skipfirst {
decomp = decomp[1:]
if err != nil {
return a, err
}
- a = append(a, int(point))
+ a = append(a, rune(point))
}
return a, nil
}
state = SLast
}
firstChar := lastChar + 1
- lastChar = int(point)
+ lastChar = rune(point)
if state != SLast {
firstChar = lastChar
}
// hasCompatDecomp returns true if any of the recursive
// decompositions contains a compatibility expansion.
// In this case, the character may not occur in NFK*.
-func hasCompatDecomp(rune int) bool {
- c := &chars[rune]
+func hasCompatDecomp(r rune) bool {
+ c := &chars[r]
if c.compatDecomp {
return true
}
JamoTEnd = 0x11C3
)
-func isHangul(rune int) bool {
- return HangulBase <= rune && rune < HangulEnd
+func isHangul(r rune) bool {
+ return HangulBase <= r && r < HangulEnd
}
-func ccc(rune int) uint8 {
- return chars[rune].ccc
+func ccc(r rune) uint8 {
+ return chars[r].ccc
}
// Insert a rune in a buffer, ordered by Canonical Combining Class.
-func insertOrdered(b Decomposition, rune int) Decomposition {
+func insertOrdered(b Decomposition, r rune) Decomposition {
n := len(b)
b = append(b, 0)
- cc := ccc(rune)
+ cc := ccc(r)
if cc > 0 {
// Use bubble sort.
for ; n > 0; n-- {
b[n] = b[n-1]
}
}
- b[n] = rune
+ b[n] = r
return b
}
// Recursively decompose.
-func decomposeRecursive(form int, rune int, d Decomposition) Decomposition {
- if isHangul(rune) {
+func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
+ if isHangul(r) {
return d
}
- dcomp := chars[rune].forms[form].decomp
+ dcomp := chars[r].forms[form].decomp
if len(dcomp) == 0 {
- return insertOrdered(d, rune)
+ return insertOrdered(d, r)
}
for _, c := range dcomp {
d = decomposeRecursive(form, c, d)
f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
}
- for _, rune := range f.decomp {
- chars[rune].forms[form].inDecomp = true
+ for _, r := range f.decomp {
+ chars[r].forms[form].inDecomp = true
}
}
switch {
case len(f.decomp) > 0:
f.quickCheck[MDecomposed] = QCNo
- case isHangul(i):
+ case isHangul(rune(i)):
f.quickCheck[MDecomposed] = QCNo
default:
f.quickCheck[MDecomposed] = QCYes
for i, char := range chars {
v := makeCharInfo(char)
if v != 0 {
- t.insert(i, v)
+ t.insert(rune(i), v)
}
}
return t.printTables("charInfo")
for _, c := range chars {
for f := 0; f < 2; f++ {
d := c.forms[f].expandedDecomp
- s := string([]int(d))
+ s := string([]rune(d))
if _, ok := positionMap[s]; !ok {
p := decompositions.Len()
decompositions.WriteByte(uint8(len(s)))
for i, c := range chars {
d := c.forms[FCanonical].expandedDecomp
if len(d) != 0 {
- nfcT.insert(i, positionMap[string([]int(d))])
+ nfcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
}
d = c.forms[FCompatibility].expandedDecomp
if len(d) != 0 {
- nfkcT.insert(i, positionMap[string([]int(d))])
+ nfkcT.insert(rune(i), positionMap[string([]rune(d))])
if ccc(c.codePoint) != ccc(d[0]) {
// We assume the lead ccc of a decomposition is !=0 in this case.
if ccc(d[0]) == 0 {
for i, c := range chars {
for _, f := range c.forms {
isNo := (f.quickCheck[MDecomposed] == QCNo)
- if (len(f.decomp) > 0) != isNo && !isHangul(i) {
+ if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
log.Fatalf("%U: NF*D must be no if rune decomposes", i)
}
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
-var testRunes = []int{
+var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
if pos != test.pos {
t.Errorf("%s:%d: position is %d; want %d", name, i, pos, test.pos)
}
- runes := []int(test.buffer)
+ runes := []rune(test.buffer)
if rb.nrune != len(runes) {
t.Errorf("%s:%d: reorder buffer lenght is %d; want %d", name, i, rb.nrune, len(runes))
continue
}
for j, want := range runes {
- found := int(rb.runeAt(j))
+ found := rune(rb.runeAt(j))
if found != want {
t.Errorf("%s:%d: rune at %d is %U; want %U", name, i, j, found, want)
}
}
if outs != test.out {
// Find first rune that differs and show context.
- ir := []int(outs)
- ig := []int(test.out)
+ ir := []rune(outs)
+ ig := []rune(test.out)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
name string
partnr int
number int
- rune int // used for character by character test
+ r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
if err != nil {
logger.Fatal(err)
}
- if test.rune == 0 {
+ if test.r == 0 {
// save for CharacterByCharacterTests
- test.rune = int(r)
+ test.r = int(r)
}
var buf [utf8.UTFMax]byte
- sz := utf8.EncodeRune(buf[:], int(r))
+ sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
if errorCount > 20 {
return
}
- st, sr, sg := []int(test), []int(result), []int(gold)
+ st, sr, sg := []rune(test), []rune(result), []rune(gold)
logger.Printf("%s:%s: %s(%X)=%X; want:%X: %s",
t.Name(), name, fstr[f], st, sr, sg, t.name)
}
if errorCount > 20 {
return
}
- logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []int(test), result, want)
+ logger.Printf("%s:%s: %s(%X)=%v; want: %v", t.Name(), name, fstr[f], []rune(test), result, want)
}
}
tests := part[1].tests
last := 0
for i := 0; i <= len(tests); i++ { // last one is special case
- var rune int
+ var r int
if i == len(tests) {
- rune = 0x2FA1E // Don't have to go to 0x10FFFF
+ r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
- rune = tests[i].rune
+ r = tests[i].r
}
- for last++; last < rune; last++ {
+ for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
t := &Test{partnr: 1, number: -1}
char := string(last)
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
-func mkUtf8(rune int) ([]byte, int) {
+func mkUTF8(r rune) ([]byte, int) {
var b [utf8.UTFMax]byte
- sz := utf8.EncodeRune(b[:], rune)
+ sz := utf8.EncodeRune(b[:], r)
return b[:sz], sz
}
func TestLookup(t *testing.T) {
for i, tt := range testRunes {
- b, szg := mkUtf8(tt)
+ b, szg := mkUTF8(tt)
v, szt := testdata.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", tt, v, i)
func TestLookupUnsafe(t *testing.T) {
for i, tt := range testRunes {
- b, _ := mkUtf8(tt)
+ b, _ := mkUTF8(tt)
v := testdata.lookupUnsafe(b)
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
func TestLookupString(t *testing.T) {
for i, tt := range testRunes {
- b, szg := mkUtf8(tt)
+ b, szg := mkUTF8(tt)
v, szt := testdata.lookupString(string(b))
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", i, v, i)
func TestLookupStringUnsafe(t *testing.T) {
for i, tt := range testRunes {
- b, _ := mkUtf8(tt)
+ b, _ := mkUTF8(tt)
v := testdata.lookupStringUnsafe(string(b))
if int(v) != i {
t.Errorf("lookupUnsafe(%U): found value %#x, expected %#x", i, v, i)
package norm
-var testRunes = []int{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
+var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
// testdataValues: 192 entries, 384 bytes
// Block 2 is the null block.
return count
}
-func (n *trieNode) insert(rune int, value uint16) {
+func (n *trieNode) insert(r rune, value uint16) {
var p [utf8.UTFMax]byte
- sz := utf8.EncodeRune(p[:], rune)
+ sz := utf8.EncodeRune(p[:], r)
for i := 0; i < sz; i++ {
if n.leaf {