]> Cypherpunks repositories - gostls13.git/commitdiff
exp/locale/collate: fixed two bugs uncovered by regression tests.
authorMarcel van Lohuizen <mpvl@golang.org>
Wed, 2 May 2012 15:01:41 +0000 (17:01 +0200)
committerMarcel van Lohuizen <mpvl@golang.org>
Wed, 2 May 2012 15:01:41 +0000 (17:01 +0200)
The first bug was that tertiary ignorables had the same colElem as
implicit colElems, yielding unexpected results. The current encoding
ensures that a non-implicit colElem is never 0.  This fix uncovered
another bug of the trie that indexed incorrectly into the null block.
This was caused by an unfinished optimization that would avoid the
need to max out the most-significant bits of continuation bytes.
This bug was also present in the trie used in exp/norm and has been
fixed there as well. The appearence of the bug was rare, as the lower
blocks happened to be nearly nil.

R=r
CC=golang-dev
https://golang.org/cl/6127070

13 files changed:
src/pkg/exp/locale/collate/build/colelem.go
src/pkg/exp/locale/collate/build/colelem_test.go
src/pkg/exp/locale/collate/build/trie.go
src/pkg/exp/locale/collate/build/trie_test.go
src/pkg/exp/locale/collate/colelem.go
src/pkg/exp/locale/collate/colelem_test.go
src/pkg/exp/locale/collate/trie.go
src/pkg/exp/locale/collate/trie_test.go
src/pkg/exp/norm/tables.go
src/pkg/exp/norm/trie.go
src/pkg/exp/norm/trie_test.go
src/pkg/exp/norm/triedata_test.go
src/pkg/exp/norm/triegen.go

index 09425320fdabda95c56d0a998e72367a033f2a2d..3d5e27c67d79d7c3d09c8d4cfb2e57711e55fd63 100644 (file)
@@ -25,11 +25,11 @@ const (
 // For normal collation elements, we assume that a collation element either has
 // a primary or non-default secondary value, not both.
 // Collation elements with a primary value are of the form
-// 010ppppp pppppppp pppppppp tttttttt, where
+// 000ppppp pppppppp pppppppp tttttttt, where
 //   - p* is primary collation value
 //   - t* is the tertiary collation value
 // Collation elements with a secondary value are of the form
-// 00000000 ssssssss ssssssss tttttttt, where
+// 01000000 ssssssss ssssssss tttttttt, where
 //   - s* is the secondary collation value
 //   - t* is the tertiary collation value
 const (
@@ -37,7 +37,7 @@ const (
        maxSecondaryBits = 16
        maxTertiaryBits  = 8
 
-       isPrimary = 0x40000000
+       isSecondary = 0x40000000
 )
 
 func makeCE(weights []int) (uint32, error) {
@@ -57,10 +57,10 @@ func makeCE(weights []int) (uint32, error) {
                        return 0, fmt.Errorf("makeCE: non-default secondary weight for non-zero primary: %X", weights)
                }
                ce = uint32(weights[0]<<maxTertiaryBits + weights[2])
-               ce |= isPrimary
        } else {
                // secondary weight form
                ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
+               ce |= isSecondary
        }
        return ce, nil
 }
@@ -162,7 +162,6 @@ const (
 // http://unicode.org/reports/tr10/#Implicit_Weights,
 // but preserve the resulting relative ordering of the runes.
 func implicitPrimary(r rune) int {
-
        if r >= minUnified && r <= maxUnified {
                // The most common case for CJK.
                return int(r) + commonUnifiedOffset
index 841ac116291034df25b12994368c9f374140c2bd..0e4f3f14e8fbd7a7cc78dae8fec2220918b7888a 100644 (file)
@@ -29,9 +29,9 @@ func decompCE(in []int) (ce uint32, err error) {
 }
 
 var ceTests = []ceTest{
-       {normalCE, []int{0, 0, 0}, 000},
-       {normalCE, []int{0, 30, 3}, 0x1E03},
-       {normalCE, []int{100, defaultSecondary, 3}, 0x40006403},
+       {normalCE, []int{0, 0, 0}, 0x40000000},
+       {normalCE, []int{0, 30, 3}, 0x40001E03},
+       {normalCE, []int{100, defaultSecondary, 3}, 0x6403},
        {normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-default secondary
        {normalCE, []int{100, 1, 3}, 0xFFFF},
        {normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
index 894d29305e0e6893d9231098603cc1352f6a3662..480cc58d151969560e08c6ce5c52cda6d5895b04 100644 (file)
@@ -19,7 +19,10 @@ import (
        "reflect"
 )
 
-const blockSize = 64
+const (
+       blockSize   = 64
+       blockOffset = 2 // Substract 2 blocks to compensate for the 0x80 added to continuation bytes.
+)
 
 type trie struct {
        index  []uint16
@@ -102,7 +105,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 {
        if n.isInternal() {
                v, ok := index.lookupBlockIdx[h]
                if !ok {
-                       v = int64(len(index.lookupBlocks))
+                       v = int64(len(index.lookupBlocks)) - blockOffset
                        index.lookupBlocks = append(index.lookupBlocks, n)
                        index.lookupBlockIdx[h] = v
                }
@@ -110,7 +113,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 {
        } else {
                v, ok := index.valueBlockIdx[h]
                if !ok {
-                       v = int64(len(index.valueBlocks))
+                       v = int64(len(index.valueBlocks)) - blockOffset
                        index.valueBlocks = append(index.valueBlocks, n)
                        index.valueBlockIdx[h] = v
                }
index c530bb92f98191a17103ba9219a63ef2fc1c7a44..3ecbd841c5943f548e0a950faac6ce61a140c3ec 100644 (file)
@@ -79,24 +79,24 @@ var testLookup = [640]uint16 {
        // Block 0x1, offset 0x40
        // Block 0x2, offset 0x80
        // Block 0x3, offset 0xc0
-       0x0c2:0x03, 0x0c4:0x04
-       0x0c8:0x05
-       0x0df:0x06
-       0x0e0:0x04
-       0x0ef:0x05
-       0x0f0:0x07, 0x0f4:0x09
+       0x0c2:0x01, 0x0c4:0x02
+       0x0c8:0x03
+       0x0df:0x04
+       0x0e0:0x02
+       0x0ef:0x03
+       0x0f0:0x05, 0x0f4:0x07
        // Block 0x4, offset 0x100
-       0x120:0x07, 0x126:0x08
+       0x120:0x05, 0x126:0x06
        // Block 0x5, offset 0x140
-       0x17f:0x09
+       0x17f:0x07
        // Block 0x6, offset 0x180
-       0x180:0x0a, 0x184:0x0b
+       0x180:0x08, 0x184:0x09
        // Block 0x7, offset 0x1c0
-       0x1d0:0x06
+       0x1d0:0x04
        // Block 0x8, offset 0x200
-       0x23f:0x0c
+       0x23f:0x0a
        // Block 0x9, offset 0x240
-       0x24f:0x08
+       0x24f:0x06
 }
 
 var testTrie = trie{ testLookup[:], testValues[:]}
index 03cfc678e82c306e42fdb7dbdfb17b79e3f0fec0..2cd62017376517527878effe056f48e3f5c0e781 100644 (file)
@@ -68,17 +68,18 @@ func (ce colElem) ctype() ceType {
 // For normal collation elements, we assume that a collation element either has
 // a primary or non-default secondary value, not both.
 // Collation elements with a primary value are of the form
-// 010ppppp pppppppp pppppppp tttttttt, where
+// 000ppppp pppppppp pppppppp tttttttt, where
 //   - p* is primary collation value
 //   - t* is the tertiary collation value
 // Collation elements with a secondary value are of the form
-// 00000000 ssssssss ssssssss tttttttt, where
+// 01000000 ssssssss ssssssss tttttttt, where
 //   - s* is the secondary collation value
 //   - t* is the tertiary collation value
 func splitCE(ce colElem) weights {
+       const secondaryMask = 0x40000000
        w := weights{}
        w.tertiary = uint8(ce)
-       if ce&0x40000000 != 0 {
+       if ce&secondaryMask == 0 {
                // primary weight form
                w.primary = uint32((ce >> 8) & 0x1FFFFF)
                w.secondary = defaultSecondary
index b201f814571e1b11ca468cab0ca24e7760d2c123..dfc6bd951877ae362be5a300cef2fe02c2591153 100644 (file)
@@ -20,14 +20,14 @@ func makeCE(weights []int) colElem {
                maxPrimaryBits   = 21
                maxSecondaryBits = 16
                maxTertiaryBits  = 8
-               isPrimary        = 0x40000000
+               isSecondary      = 0x40000000
        )
        var ce colElem
        if weights[0] != 0 {
                ce = colElem(weights[0]<<maxTertiaryBits + weights[2])
-               ce |= isPrimary
        } else {
                ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
+               ce |= isSecondary
        }
        return ce
 }
index ea0396085dbefd4af1e4495aff22650eeec549dc..00fe91bf030682c216b4ab6afd0a32ef7e10fe81 100644 (file)
@@ -27,15 +27,10 @@ const (
        t5 = 0xF8 // 1111 1000
        t6 = 0xFC // 1111 1100
        te = 0xFE // 1111 1110
-
-       maskx = 0x3F // 0011 1111
-       mask2 = 0x1F // 0001 1111
-       mask3 = 0x0F // 0000 1111
-       mask4 = 0x07 // 0000 0111
 )
 
 func (t *trie) lookupValue(n uint16, b byte) colElem {
-       return colElem(t.values[int(n)<<6+int(b&maskx)])
+       return colElem(t.values[int(n)<<6+int(b)])
 }
 
 // lookup returns the trie value for the first UTF-8 encoding in s and
@@ -67,7 +62,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := int(i)<<6 + int(c1)&maskx
+               o := int(i)<<6 + int(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
@@ -83,13 +78,13 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := int(i)<<6 + int(c1)&maskx
+               o := int(i)<<6 + int(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
                        return 0, 2
                }
-               o = int(i)<<6 + int(c2)&maskx
+               o = int(i)<<6 + int(c2)
                i = t.index[o]
                c3 := s[3]
                if c3 < tx || t2 <= c3 {
index b5868cad1489774a59a909a258d5c01254939b81..891a5e3a312f56a803821cadb3c15224c8b9969c 100644 (file)
@@ -89,18 +89,18 @@ var testValues = [832]uint32{
 }
 
 var testLookup = [640]uint16{
-       0x0c2: 0x03, 0x0c4: 0x04,
-       0x0c8: 0x05,
-       0x0df: 0x06,
-       0x0e0: 0x04,
-       0x0ef: 0x05,
-       0x0f0: 0x07, 0x0f4: 0x09,
-       0x120: 0x07, 0x126: 0x08,
-       0x17f: 0x09,
-       0x180: 0x0a, 0x184: 0x0b,
-       0x1d0: 0x06,
-       0x23f: 0x0c,
-       0x24f: 0x08,
+       0x0c2: 0x01, 0x0c4: 0x02,
+       0x0c8: 0x03,
+       0x0df: 0x04,
+       0x0e0: 0x02,
+       0x0ef: 0x03,
+       0x0f0: 0x05, 0x0f4: 0x07,
+       0x120: 0x05, 0x126: 0x06,
+       0x17f: 0x07,
+       0x180: 0x08, 0x184: 0x09,
+       0x1d0: 0x04,
+       0x23f: 0x0a,
+       0x24f: 0x06,
 }
 
 var testTrie = trie{testLookup[:], testValues[:]}
index db7e64ee656d38ba1e51cd67b7dcb818074eca5b..231d85d898fc7bf4ecf3d8210a2820a9d065076b 100644 (file)
@@ -3746,75 +3746,75 @@ var nfcLookup = [1088]uint8{
        // Block 0x1, offset 0x40
        // Block 0x2, offset 0x80
        // Block 0x3, offset 0xc0
-       0x0c2: 0x2e, 0x0c3: 0x03, 0x0c4: 0x04, 0x0c5: 0x05, 0x0c6: 0x2f, 0x0c7: 0x06,
-       0x0c8: 0x07, 0x0ca: 0x30, 0x0cc: 0x08, 0x0cd: 0x09, 0x0ce: 0x0a, 0x0cf: 0x31,
-       0x0d0: 0x0b, 0x0d1: 0x32, 0x0d2: 0x33, 0x0d3: 0x0c, 0x0d6: 0x0d, 0x0d7: 0x34,
-       0x0d8: 0x35, 0x0d9: 0x0e, 0x0db: 0x36, 0x0dc: 0x37, 0x0dd: 0x38, 0x0df: 0x39,
-       0x0e0: 0x04, 0x0e1: 0x05, 0x0e2: 0x06, 0x0e3: 0x07,
-       0x0ea: 0x08, 0x0eb: 0x09, 0x0ec: 0x09, 0x0ed: 0x0a, 0x0ef: 0x0b,
-       0x0f0: 0x10,
+       0x0c2: 0x2c, 0x0c3: 0x01, 0x0c4: 0x02, 0x0c5: 0x03, 0x0c6: 0x2d, 0x0c7: 0x04,
+       0x0c8: 0x05, 0x0ca: 0x2e, 0x0cc: 0x06, 0x0cd: 0x07, 0x0ce: 0x08, 0x0cf: 0x2f,
+       0x0d0: 0x09, 0x0d1: 0x30, 0x0d2: 0x31, 0x0d3: 0x0a, 0x0d6: 0x0b, 0x0d7: 0x32,
+       0x0d8: 0x33, 0x0d9: 0x0c, 0x0db: 0x34, 0x0dc: 0x35, 0x0dd: 0x36, 0x0df: 0x37,
+       0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05,
+       0x0ea: 0x06, 0x0eb: 0x07, 0x0ec: 0x07, 0x0ed: 0x08, 0x0ef: 0x09,
+       0x0f0: 0x0e,
        // Block 0x4, offset 0x100
-       0x120: 0x3a, 0x121: 0x3b, 0x124: 0x3c, 0x125: 0x3d, 0x126: 0x3e, 0x127: 0x3f,
-       0x128: 0x40, 0x129: 0x41, 0x12a: 0x42, 0x12b: 0x43, 0x12c: 0x3e, 0x12d: 0x44, 0x12e: 0x45, 0x12f: 0x46,
-       0x131: 0x47, 0x132: 0x48, 0x133: 0x49, 0x134: 0x4a, 0x135: 0x4b, 0x137: 0x4c,
-       0x138: 0x4d, 0x139: 0x4e, 0x13a: 0x4f, 0x13b: 0x50, 0x13c: 0x51, 0x13d: 0x52, 0x13e: 0x53, 0x13f: 0x54,
+       0x120: 0x38, 0x121: 0x39, 0x124: 0x3a, 0x125: 0x3b, 0x126: 0x3c, 0x127: 0x3d,
+       0x128: 0x3e, 0x129: 0x3f, 0x12a: 0x40, 0x12b: 0x41, 0x12c: 0x3c, 0x12d: 0x42, 0x12e: 0x43, 0x12f: 0x44,
+       0x131: 0x45, 0x132: 0x46, 0x133: 0x47, 0x134: 0x48, 0x135: 0x49, 0x137: 0x4a,
+       0x138: 0x4b, 0x139: 0x4c, 0x13a: 0x4d, 0x13b: 0x4e, 0x13c: 0x4f, 0x13d: 0x50, 0x13e: 0x51, 0x13f: 0x52,
        // Block 0x5, offset 0x140
-       0x140: 0x55, 0x142: 0x56, 0x144: 0x57, 0x145: 0x58, 0x146: 0x59, 0x147: 0x5a,
-       0x14d: 0x5b,
-       0x15c: 0x5c, 0x15f: 0x5d,
-       0x162: 0x5e, 0x164: 0x5f,
-       0x168: 0x60, 0x169: 0x61, 0x16c: 0x0f, 0x16d: 0x62, 0x16e: 0x63, 0x16f: 0x64,
-       0x170: 0x65, 0x173: 0x66, 0x177: 0x67,
-       0x178: 0x10, 0x179: 0x11, 0x17a: 0x12, 0x17b: 0x13, 0x17c: 0x14, 0x17d: 0x15, 0x17e: 0x16, 0x17f: 0x17,
+       0x140: 0x53, 0x142: 0x54, 0x144: 0x55, 0x145: 0x56, 0x146: 0x57, 0x147: 0x58,
+       0x14d: 0x59,
+       0x15c: 0x5a, 0x15f: 0x5b,
+       0x162: 0x5c, 0x164: 0x5d,
+       0x168: 0x5e, 0x169: 0x5f, 0x16c: 0x0d, 0x16d: 0x60, 0x16e: 0x61, 0x16f: 0x62,
+       0x170: 0x63, 0x173: 0x64, 0x177: 0x65,
+       0x178: 0x0e, 0x179: 0x0f, 0x17a: 0x10, 0x17b: 0x11, 0x17c: 0x12, 0x17d: 0x13, 0x17e: 0x14, 0x17f: 0x15,
        // Block 0x6, offset 0x180
-       0x180: 0x68, 0x183: 0x69, 0x184: 0x6a, 0x186: 0x6b, 0x187: 0x6c,
-       0x188: 0x6d, 0x189: 0x18, 0x18a: 0x19, 0x18b: 0x6e, 0x18c: 0x6f,
-       0x1ab: 0x70,
-       0x1b3: 0x71, 0x1b5: 0x72, 0x1b7: 0x73,
+       0x180: 0x66, 0x183: 0x67, 0x184: 0x68, 0x186: 0x69, 0x187: 0x6a,
+       0x188: 0x6b, 0x189: 0x16, 0x18a: 0x17, 0x18b: 0x6c, 0x18c: 0x6d,
+       0x1ab: 0x6e,
+       0x1b3: 0x6f, 0x1b5: 0x70, 0x1b7: 0x71,
        // Block 0x7, offset 0x1c0
-       0x1c0: 0x74, 0x1c1: 0x1a, 0x1c2: 0x1b, 0x1c3: 0x1c,
+       0x1c0: 0x72, 0x1c1: 0x18, 0x1c2: 0x19, 0x1c3: 0x1a,
        // Block 0x8, offset 0x200
-       0x219: 0x75, 0x21b: 0x76,
-       0x220: 0x77, 0x223: 0x78, 0x224: 0x79, 0x225: 0x7a, 0x226: 0x7b, 0x227: 0x7c,
-       0x22a: 0x7d, 0x22b: 0x7e, 0x22f: 0x7f,
-       0x230: 0x80, 0x231: 0x80, 0x232: 0x80, 0x233: 0x80, 0x234: 0x80, 0x235: 0x80, 0x236: 0x80, 0x237: 0x80,
-       0x238: 0x80, 0x239: 0x80, 0x23a: 0x80, 0x23b: 0x80, 0x23c: 0x80, 0x23d: 0x80, 0x23e: 0x80, 0x23f: 0x80,
+       0x219: 0x73, 0x21b: 0x74,
+       0x220: 0x75, 0x223: 0x76, 0x224: 0x77, 0x225: 0x78, 0x226: 0x79, 0x227: 0x7a,
+       0x22a: 0x7b, 0x22b: 0x7c, 0x22f: 0x7d,
+       0x230: 0x7e, 0x231: 0x7e, 0x232: 0x7e, 0x233: 0x7e, 0x234: 0x7e, 0x235: 0x7e, 0x236: 0x7e, 0x237: 0x7e,
+       0x238: 0x7e, 0x239: 0x7e, 0x23a: 0x7e, 0x23b: 0x7e, 0x23c: 0x7e, 0x23d: 0x7e, 0x23e: 0x7e, 0x23f: 0x7e,
        // Block 0x9, offset 0x240
-       0x240: 0x80, 0x241: 0x80, 0x242: 0x80, 0x243: 0x80, 0x244: 0x80, 0x245: 0x80, 0x246: 0x80, 0x247: 0x80,
-       0x248: 0x80, 0x249: 0x80, 0x24a: 0x80, 0x24b: 0x80, 0x24c: 0x80, 0x24d: 0x80, 0x24e: 0x80, 0x24f: 0x80,
-       0x250: 0x80, 0x251: 0x80, 0x252: 0x80, 0x253: 0x80, 0x254: 0x80, 0x255: 0x80, 0x256: 0x80, 0x257: 0x80,
-       0x258: 0x80, 0x259: 0x80, 0x25a: 0x80, 0x25b: 0x80, 0x25c: 0x80, 0x25d: 0x80, 0x25e: 0x80, 0x25f: 0x80,
-       0x260: 0x80, 0x261: 0x80, 0x262: 0x80, 0x263: 0x80, 0x264: 0x80, 0x265: 0x80, 0x266: 0x80, 0x267: 0x80,
-       0x268: 0x80, 0x269: 0x80, 0x26a: 0x80, 0x26b: 0x80, 0x26c: 0x80, 0x26d: 0x80, 0x26e: 0x80, 0x26f: 0x80,
-       0x270: 0x80, 0x271: 0x80, 0x272: 0x80, 0x273: 0x80, 0x274: 0x80, 0x275: 0x80, 0x276: 0x80, 0x277: 0x80,
-       0x278: 0x80, 0x279: 0x80, 0x27a: 0x80, 0x27b: 0x80, 0x27c: 0x80, 0x27d: 0x80, 0x27e: 0x80, 0x27f: 0x80,
+       0x240: 0x7e, 0x241: 0x7e, 0x242: 0x7e, 0x243: 0x7e, 0x244: 0x7e, 0x245: 0x7e, 0x246: 0x7e, 0x247: 0x7e,
+       0x248: 0x7e, 0x249: 0x7e, 0x24a: 0x7e, 0x24b: 0x7e, 0x24c: 0x7e, 0x24d: 0x7e, 0x24e: 0x7e, 0x24f: 0x7e,
+       0x250: 0x7e, 0x251: 0x7e, 0x252: 0x7e, 0x253: 0x7e, 0x254: 0x7e, 0x255: 0x7e, 0x256: 0x7e, 0x257: 0x7e,
+       0x258: 0x7e, 0x259: 0x7e, 0x25a: 0x7e, 0x25b: 0x7e, 0x25c: 0x7e, 0x25d: 0x7e, 0x25e: 0x7e, 0x25f: 0x7e,
+       0x260: 0x7e, 0x261: 0x7e, 0x262: 0x7e, 0x263: 0x7e, 0x264: 0x7e, 0x265: 0x7e, 0x266: 0x7e, 0x267: 0x7e,
+       0x268: 0x7e, 0x269: 0x7e, 0x26a: 0x7e, 0x26b: 0x7e, 0x26c: 0x7e, 0x26d: 0x7e, 0x26e: 0x7e, 0x26f: 0x7e,
+       0x270: 0x7e, 0x271: 0x7e, 0x272: 0x7e, 0x273: 0x7e, 0x274: 0x7e, 0x275: 0x7e, 0x276: 0x7e, 0x277: 0x7e,
+       0x278: 0x7e, 0x279: 0x7e, 0x27a: 0x7e, 0x27b: 0x7e, 0x27c: 0x7e, 0x27d: 0x7e, 0x27e: 0x7e, 0x27f: 0x7e,
        // Block 0xa, offset 0x280
-       0x280: 0x80, 0x281: 0x80, 0x282: 0x80, 0x283: 0x80, 0x284: 0x80, 0x285: 0x80, 0x286: 0x80, 0x287: 0x80,
-       0x288: 0x80, 0x289: 0x80, 0x28a: 0x80, 0x28b: 0x80, 0x28c: 0x80, 0x28d: 0x80, 0x28e: 0x80, 0x28f: 0x80,
-       0x290: 0x80, 0x291: 0x80, 0x292: 0x80, 0x293: 0x80, 0x294: 0x80, 0x295: 0x80, 0x296: 0x80, 0x297: 0x80,
-       0x298: 0x80, 0x299: 0x80, 0x29a: 0x80, 0x29b: 0x80, 0x29c: 0x80, 0x29d: 0x80, 0x29e: 0x81,
+       0x280: 0x7e, 0x281: 0x7e, 0x282: 0x7e, 0x283: 0x7e, 0x284: 0x7e, 0x285: 0x7e, 0x286: 0x7e, 0x287: 0x7e,
+       0x288: 0x7e, 0x289: 0x7e, 0x28a: 0x7e, 0x28b: 0x7e, 0x28c: 0x7e, 0x28d: 0x7e, 0x28e: 0x7e, 0x28f: 0x7e,
+       0x290: 0x7e, 0x291: 0x7e, 0x292: 0x7e, 0x293: 0x7e, 0x294: 0x7e, 0x295: 0x7e, 0x296: 0x7e, 0x297: 0x7e,
+       0x298: 0x7e, 0x299: 0x7e, 0x29a: 0x7e, 0x29b: 0x7e, 0x29c: 0x7e, 0x29d: 0x7e, 0x29e: 0x7f,
        // Block 0xb, offset 0x2c0
-       0x2e4: 0x1d, 0x2e5: 0x1e, 0x2e6: 0x1f, 0x2e7: 0x20,
-       0x2e8: 0x21, 0x2e9: 0x22, 0x2ea: 0x23, 0x2eb: 0x24, 0x2ec: 0x82, 0x2ed: 0x83,
-       0x2f8: 0x84,
+       0x2e4: 0x1b, 0x2e5: 0x1c, 0x2e6: 0x1d, 0x2e7: 0x1e,
+       0x2e8: 0x1f, 0x2e9: 0x20, 0x2ea: 0x21, 0x2eb: 0x22, 0x2ec: 0x80, 0x2ed: 0x81,
+       0x2f8: 0x82,
        // Block 0xc, offset 0x300
-       0x307: 0x85,
-       0x328: 0x86,
+       0x307: 0x83,
+       0x328: 0x84,
        // Block 0xd, offset 0x340
-       0x341: 0x77, 0x342: 0x87,
+       0x341: 0x75, 0x342: 0x85,
        // Block 0xe, offset 0x380
-       0x385: 0x88, 0x386: 0x89, 0x387: 0x8a,
-       0x389: 0x8b,
+       0x385: 0x86, 0x386: 0x87, 0x387: 0x88,
+       0x389: 0x89,
        // Block 0xf, offset 0x3c0
-       0x3e0: 0x25, 0x3e1: 0x26, 0x3e2: 0x27, 0x3e3: 0x28, 0x3e4: 0x29, 0x3e5: 0x2a, 0x3e6: 0x2b, 0x3e7: 0x2c,
-       0x3e8: 0x2d,
+       0x3e0: 0x23, 0x3e1: 0x24, 0x3e2: 0x25, 0x3e3: 0x26, 0x3e4: 0x27, 0x3e5: 0x28, 0x3e6: 0x29, 0x3e7: 0x2a,
+       0x3e8: 0x2b,
        // Block 0x10, offset 0x400
-       0x410: 0x0c, 0x411: 0x0d,
-       0x41d: 0x0e,
-       0x42f: 0x0f,
+       0x410: 0x0a, 0x411: 0x0b,
+       0x41d: 0x0c,
+       0x42f: 0x0d,
 }
 
-var nfcTrie = trie{nfcLookup[:], nfcValues[:], nfcSparseValues[:], nfcSparseOffset[:], 46}
+var nfcTrie = trie{nfcLookup[:], nfcValues[:], nfcSparseValues[:], nfcSparseOffset[:], 44}
 
 // nfkcValues: 5568 entries, 11136 bytes
 // Block 2 is the null block.
@@ -5642,84 +5642,84 @@ var nfkcLookup = [1152]uint8{
        // Block 0x1, offset 0x40
        // Block 0x2, offset 0x80
        // Block 0x3, offset 0xc0
-       0x0c2: 0x57, 0x0c3: 0x03, 0x0c4: 0x04, 0x0c5: 0x05, 0x0c6: 0x58, 0x0c7: 0x06,
-       0x0c8: 0x07, 0x0ca: 0x59, 0x0cb: 0x5a, 0x0cc: 0x08, 0x0cd: 0x09, 0x0ce: 0x0a, 0x0cf: 0x0b,
-       0x0d0: 0x0c, 0x0d1: 0x5b, 0x0d2: 0x5c, 0x0d3: 0x0d, 0x0d6: 0x0e, 0x0d7: 0x5d,
-       0x0d8: 0x5e, 0x0d9: 0x0f, 0x0db: 0x5f, 0x0dc: 0x60, 0x0dd: 0x61, 0x0df: 0x62,
-       0x0e0: 0x04, 0x0e1: 0x05, 0x0e2: 0x06, 0x0e3: 0x07,
-       0x0ea: 0x08, 0x0eb: 0x09, 0x0ec: 0x09, 0x0ed: 0x0a, 0x0ef: 0x0b,
-       0x0f0: 0x11,
+       0x0c2: 0x55, 0x0c3: 0x01, 0x0c4: 0x02, 0x0c5: 0x03, 0x0c6: 0x56, 0x0c7: 0x04,
+       0x0c8: 0x05, 0x0ca: 0x57, 0x0cb: 0x58, 0x0cc: 0x06, 0x0cd: 0x07, 0x0ce: 0x08, 0x0cf: 0x09,
+       0x0d0: 0x0a, 0x0d1: 0x59, 0x0d2: 0x5a, 0x0d3: 0x0b, 0x0d6: 0x0c, 0x0d7: 0x5b,
+       0x0d8: 0x5c, 0x0d9: 0x0d, 0x0db: 0x5d, 0x0dc: 0x5e, 0x0dd: 0x5f, 0x0df: 0x60,
+       0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05,
+       0x0ea: 0x06, 0x0eb: 0x07, 0x0ec: 0x07, 0x0ed: 0x08, 0x0ef: 0x09,
+       0x0f0: 0x0f,
        // Block 0x4, offset 0x100
-       0x120: 0x63, 0x121: 0x64, 0x124: 0x65, 0x125: 0x66, 0x126: 0x67, 0x127: 0x68,
-       0x128: 0x69, 0x129: 0x6a, 0x12a: 0x6b, 0x12b: 0x6c, 0x12c: 0x67, 0x12d: 0x6d, 0x12e: 0x6e, 0x12f: 0x6f,
-       0x131: 0x70, 0x132: 0x71, 0x133: 0x72, 0x134: 0x73, 0x135: 0x74, 0x137: 0x75,
-       0x138: 0x76, 0x139: 0x77, 0x13a: 0x78, 0x13b: 0x79, 0x13c: 0x7a, 0x13d: 0x7b, 0x13e: 0x7c, 0x13f: 0x7d,
+       0x120: 0x61, 0x121: 0x62, 0x124: 0x63, 0x125: 0x64, 0x126: 0x65, 0x127: 0x66,
+       0x128: 0x67, 0x129: 0x68, 0x12a: 0x69, 0x12b: 0x6a, 0x12c: 0x65, 0x12d: 0x6b, 0x12e: 0x6c, 0x12f: 0x6d,
+       0x131: 0x6e, 0x132: 0x6f, 0x133: 0x70, 0x134: 0x71, 0x135: 0x72, 0x137: 0x73,
+       0x138: 0x74, 0x139: 0x75, 0x13a: 0x76, 0x13b: 0x77, 0x13c: 0x78, 0x13d: 0x79, 0x13e: 0x7a, 0x13f: 0x7b,
        // Block 0x5, offset 0x140
-       0x140: 0x7e, 0x142: 0x7f, 0x143: 0x80, 0x144: 0x81, 0x145: 0x82, 0x146: 0x83, 0x147: 0x84,
-       0x14d: 0x85,
-       0x15c: 0x86, 0x15f: 0x87,
-       0x162: 0x88, 0x164: 0x89,
-       0x168: 0x8a, 0x169: 0x8b, 0x16c: 0x10, 0x16d: 0x8c, 0x16e: 0x8d, 0x16f: 0x8e,
-       0x170: 0x8f, 0x173: 0x90, 0x174: 0x91, 0x175: 0x11, 0x176: 0x12, 0x177: 0x92,
-       0x178: 0x13, 0x179: 0x14, 0x17a: 0x15, 0x17b: 0x16, 0x17c: 0x17, 0x17d: 0x18, 0x17e: 0x19, 0x17f: 0x1a,
+       0x140: 0x7c, 0x142: 0x7d, 0x143: 0x7e, 0x144: 0x7f, 0x145: 0x80, 0x146: 0x81, 0x147: 0x82,
+       0x14d: 0x83,
+       0x15c: 0x84, 0x15f: 0x85,
+       0x162: 0x86, 0x164: 0x87,
+       0x168: 0x88, 0x169: 0x89, 0x16c: 0x0e, 0x16d: 0x8a, 0x16e: 0x8b, 0x16f: 0x8c,
+       0x170: 0x8d, 0x173: 0x8e, 0x174: 0x8f, 0x175: 0x0f, 0x176: 0x10, 0x177: 0x90,
+       0x178: 0x11, 0x179: 0x12, 0x17a: 0x13, 0x17b: 0x14, 0x17c: 0x15, 0x17d: 0x16, 0x17e: 0x17, 0x17f: 0x18,
        // Block 0x6, offset 0x180
-       0x180: 0x93, 0x181: 0x94, 0x182: 0x95, 0x183: 0x96, 0x184: 0x1b, 0x185: 0x1c, 0x186: 0x97, 0x187: 0x98,
-       0x188: 0x99, 0x189: 0x1d, 0x18a: 0x1e, 0x18b: 0x9a, 0x18c: 0x9b,
-       0x191: 0x1f, 0x192: 0x20, 0x193: 0x9c,
-       0x1a8: 0x9d, 0x1a9: 0x9e, 0x1ab: 0x9f,
-       0x1b1: 0xa0, 0x1b3: 0xa1, 0x1b5: 0xa2, 0x1b7: 0xa3,
-       0x1ba: 0xa4, 0x1bb: 0xa5, 0x1bc: 0x21, 0x1bd: 0x22, 0x1be: 0x23, 0x1bf: 0xa6,
+       0x180: 0x91, 0x181: 0x92, 0x182: 0x93, 0x183: 0x94, 0x184: 0x19, 0x185: 0x1a, 0x186: 0x95, 0x187: 0x96,
+       0x188: 0x97, 0x189: 0x1b, 0x18a: 0x1c, 0x18b: 0x98, 0x18c: 0x99,
+       0x191: 0x1d, 0x192: 0x1e, 0x193: 0x9a,
+       0x1a8: 0x9b, 0x1a9: 0x9c, 0x1ab: 0x9d,
+       0x1b1: 0x9e, 0x1b3: 0x9f, 0x1b5: 0xa0, 0x1b7: 0xa1,
+       0x1ba: 0xa2, 0x1bb: 0xa3, 0x1bc: 0x1f, 0x1bd: 0x20, 0x1be: 0x21, 0x1bf: 0xa4,
        // Block 0x7, offset 0x1c0
-       0x1c0: 0xa7, 0x1c1: 0x24, 0x1c2: 0x25, 0x1c3: 0x26, 0x1c4: 0xa8, 0x1c5: 0xa9, 0x1c6: 0x27,
-       0x1c8: 0x28, 0x1c9: 0x29, 0x1ca: 0x2a, 0x1cb: 0x2b, 0x1cc: 0x2c, 0x1cd: 0x2d, 0x1ce: 0x2e, 0x1cf: 0x2f,
+       0x1c0: 0xa5, 0x1c1: 0x22, 0x1c2: 0x23, 0x1c3: 0x24, 0x1c4: 0xa6, 0x1c5: 0xa7, 0x1c6: 0x25,
+       0x1c8: 0x26, 0x1c9: 0x27, 0x1ca: 0x28, 0x1cb: 0x29, 0x1cc: 0x2a, 0x1cd: 0x2b, 0x1ce: 0x2c, 0x1cf: 0x2d,
        // Block 0x8, offset 0x200
-       0x219: 0xaa, 0x21b: 0xab, 0x21d: 0xac,
-       0x220: 0xad, 0x223: 0xae, 0x224: 0xaf, 0x225: 0xb0, 0x226: 0xb1, 0x227: 0xb2,
-       0x22a: 0xb3, 0x22b: 0xb4, 0x22f: 0xb5,
-       0x230: 0xb6, 0x231: 0xb6, 0x232: 0xb6, 0x233: 0xb6, 0x234: 0xb6, 0x235: 0xb6, 0x236: 0xb6, 0x237: 0xb6,
-       0x238: 0xb6, 0x239: 0xb6, 0x23a: 0xb6, 0x23b: 0xb6, 0x23c: 0xb6, 0x23d: 0xb6, 0x23e: 0xb6, 0x23f: 0xb6,
+       0x219: 0xa8, 0x21b: 0xa9, 0x21d: 0xaa,
+       0x220: 0xab, 0x223: 0xac, 0x224: 0xad, 0x225: 0xae, 0x226: 0xaf, 0x227: 0xb0,
+       0x22a: 0xb1, 0x22b: 0xb2, 0x22f: 0xb3,
+       0x230: 0xb4, 0x231: 0xb4, 0x232: 0xb4, 0x233: 0xb4, 0x234: 0xb4, 0x235: 0xb4, 0x236: 0xb4, 0x237: 0xb4,
+       0x238: 0xb4, 0x239: 0xb4, 0x23a: 0xb4, 0x23b: 0xb4, 0x23c: 0xb4, 0x23d: 0xb4, 0x23e: 0xb4, 0x23f: 0xb4,
        // Block 0x9, offset 0x240
-       0x240: 0xb6, 0x241: 0xb6, 0x242: 0xb6, 0x243: 0xb6, 0x244: 0xb6, 0x245: 0xb6, 0x246: 0xb6, 0x247: 0xb6,
-       0x248: 0xb6, 0x249: 0xb6, 0x24a: 0xb6, 0x24b: 0xb6, 0x24c: 0xb6, 0x24d: 0xb6, 0x24e: 0xb6, 0x24f: 0xb6,
-       0x250: 0xb6, 0x251: 0xb6, 0x252: 0xb6, 0x253: 0xb6, 0x254: 0xb6, 0x255: 0xb6, 0x256: 0xb6, 0x257: 0xb6,
-       0x258: 0xb6, 0x259: 0xb6, 0x25a: 0xb6, 0x25b: 0xb6, 0x25c: 0xb6, 0x25d: 0xb6, 0x25e: 0xb6, 0x25f: 0xb6,
-       0x260: 0xb6, 0x261: 0xb6, 0x262: 0xb6, 0x263: 0xb6, 0x264: 0xb6, 0x265: 0xb6, 0x266: 0xb6, 0x267: 0xb6,
-       0x268: 0xb6, 0x269: 0xb6, 0x26a: 0xb6, 0x26b: 0xb6, 0x26c: 0xb6, 0x26d: 0xb6, 0x26e: 0xb6, 0x26f: 0xb6,
-       0x270: 0xb6, 0x271: 0xb6, 0x272: 0xb6, 0x273: 0xb6, 0x274: 0xb6, 0x275: 0xb6, 0x276: 0xb6, 0x277: 0xb6,
-       0x278: 0xb6, 0x279: 0xb6, 0x27a: 0xb6, 0x27b: 0xb6, 0x27c: 0xb6, 0x27d: 0xb6, 0x27e: 0xb6, 0x27f: 0xb6,
+       0x240: 0xb4, 0x241: 0xb4, 0x242: 0xb4, 0x243: 0xb4, 0x244: 0xb4, 0x245: 0xb4, 0x246: 0xb4, 0x247: 0xb4,
+       0x248: 0xb4, 0x249: 0xb4, 0x24a: 0xb4, 0x24b: 0xb4, 0x24c: 0xb4, 0x24d: 0xb4, 0x24e: 0xb4, 0x24f: 0xb4,
+       0x250: 0xb4, 0x251: 0xb4, 0x252: 0xb4, 0x253: 0xb4, 0x254: 0xb4, 0x255: 0xb4, 0x256: 0xb4, 0x257: 0xb4,
+       0x258: 0xb4, 0x259: 0xb4, 0x25a: 0xb4, 0x25b: 0xb4, 0x25c: 0xb4, 0x25d: 0xb4, 0x25e: 0xb4, 0x25f: 0xb4,
+       0x260: 0xb4, 0x261: 0xb4, 0x262: 0xb4, 0x263: 0xb4, 0x264: 0xb4, 0x265: 0xb4, 0x266: 0xb4, 0x267: 0xb4,
+       0x268: 0xb4, 0x269: 0xb4, 0x26a: 0xb4, 0x26b: 0xb4, 0x26c: 0xb4, 0x26d: 0xb4, 0x26e: 0xb4, 0x26f: 0xb4,
+       0x270: 0xb4, 0x271: 0xb4, 0x272: 0xb4, 0x273: 0xb4, 0x274: 0xb4, 0x275: 0xb4, 0x276: 0xb4, 0x277: 0xb4,
+       0x278: 0xb4, 0x279: 0xb4, 0x27a: 0xb4, 0x27b: 0xb4, 0x27c: 0xb4, 0x27d: 0xb4, 0x27e: 0xb4, 0x27f: 0xb4,
        // Block 0xa, offset 0x280
-       0x280: 0xb6, 0x281: 0xb6, 0x282: 0xb6, 0x283: 0xb6, 0x284: 0xb6, 0x285: 0xb6, 0x286: 0xb6, 0x287: 0xb6,
-       0x288: 0xb6, 0x289: 0xb6, 0x28a: 0xb6, 0x28b: 0xb6, 0x28c: 0xb6, 0x28d: 0xb6, 0x28e: 0xb6, 0x28f: 0xb6,
-       0x290: 0xb6, 0x291: 0xb6, 0x292: 0xb6, 0x293: 0xb6, 0x294: 0xb6, 0x295: 0xb6, 0x296: 0xb6, 0x297: 0xb6,
-       0x298: 0xb6, 0x299: 0xb6, 0x29a: 0xb6, 0x29b: 0xb6, 0x29c: 0xb6, 0x29d: 0xb6, 0x29e: 0xb7,
+       0x280: 0xb4, 0x281: 0xb4, 0x282: 0xb4, 0x283: 0xb4, 0x284: 0xb4, 0x285: 0xb4, 0x286: 0xb4, 0x287: 0xb4,
+       0x288: 0xb4, 0x289: 0xb4, 0x28a: 0xb4, 0x28b: 0xb4, 0x28c: 0xb4, 0x28d: 0xb4, 0x28e: 0xb4, 0x28f: 0xb4,
+       0x290: 0xb4, 0x291: 0xb4, 0x292: 0xb4, 0x293: 0xb4, 0x294: 0xb4, 0x295: 0xb4, 0x296: 0xb4, 0x297: 0xb4,
+       0x298: 0xb4, 0x299: 0xb4, 0x29a: 0xb4, 0x29b: 0xb4, 0x29c: 0xb4, 0x29d: 0xb4, 0x29e: 0xb5,
        // Block 0xb, offset 0x2c0
-       0x2e4: 0x30, 0x2e5: 0x31, 0x2e6: 0x32, 0x2e7: 0x33,
-       0x2e8: 0x34, 0x2e9: 0x35, 0x2ea: 0x36, 0x2eb: 0x37, 0x2ec: 0x38, 0x2ed: 0x39, 0x2ee: 0x3a, 0x2ef: 0x3b,
-       0x2f0: 0x3c, 0x2f1: 0x3d, 0x2f2: 0x3e, 0x2f3: 0x3f, 0x2f4: 0x40, 0x2f5: 0x41, 0x2f6: 0x42, 0x2f7: 0x43,
-       0x2f8: 0x44, 0x2f9: 0x45, 0x2fa: 0x46, 0x2fb: 0x47, 0x2fc: 0xb8, 0x2fd: 0x48, 0x2fe: 0x49, 0x2ff: 0xb9,
+       0x2e4: 0x2e, 0x2e5: 0x2f, 0x2e6: 0x30, 0x2e7: 0x31,
+       0x2e8: 0x32, 0x2e9: 0x33, 0x2ea: 0x34, 0x2eb: 0x35, 0x2ec: 0x36, 0x2ed: 0x37, 0x2ee: 0x38, 0x2ef: 0x39,
+       0x2f0: 0x3a, 0x2f1: 0x3b, 0x2f2: 0x3c, 0x2f3: 0x3d, 0x2f4: 0x3e, 0x2f5: 0x3f, 0x2f6: 0x40, 0x2f7: 0x41,
+       0x2f8: 0x42, 0x2f9: 0x43, 0x2fa: 0x44, 0x2fb: 0x45, 0x2fc: 0xb6, 0x2fd: 0x46, 0x2fe: 0x47, 0x2ff: 0xb7,
        // Block 0xc, offset 0x300
-       0x307: 0xba,
-       0x328: 0xbb,
+       0x307: 0xb8,
+       0x328: 0xb9,
        // Block 0xd, offset 0x340
-       0x341: 0xad, 0x342: 0xbc,
+       0x341: 0xab, 0x342: 0xba,
        // Block 0xe, offset 0x380
-       0x385: 0xbd, 0x386: 0xbe, 0x387: 0xbf,
-       0x389: 0xc0,
-       0x390: 0xc1, 0x391: 0xc2, 0x392: 0xc3, 0x393: 0xc4, 0x394: 0xc5, 0x395: 0xc6, 0x396: 0xc7, 0x397: 0xc8,
-       0x398: 0xc9, 0x399: 0xca, 0x39a: 0x4a, 0x39b: 0xcb, 0x39c: 0xcc, 0x39d: 0xcd, 0x39e: 0xce, 0x39f: 0x4b,
+       0x385: 0xbb, 0x386: 0xbc, 0x387: 0xbd,
+       0x389: 0xbe,
+       0x390: 0xbf, 0x391: 0xc0, 0x392: 0xc1, 0x393: 0xc2, 0x394: 0xc3, 0x395: 0xc4, 0x396: 0xc5, 0x397: 0xc6,
+       0x398: 0xc7, 0x399: 0xc8, 0x39a: 0x48, 0x39b: 0xc9, 0x39c: 0xca, 0x39d: 0xcb, 0x39e: 0xcc, 0x39f: 0x49,
        // Block 0xf, offset 0x3c0
-       0x3c4: 0x4c, 0x3c5: 0xcf, 0x3c6: 0xd0,
-       0x3c8: 0x4d, 0x3c9: 0xd1,
+       0x3c4: 0x4a, 0x3c5: 0xcd, 0x3c6: 0xce,
+       0x3c8: 0x4b, 0x3c9: 0xcf,
        // Block 0x10, offset 0x400
-       0x420: 0x4e, 0x421: 0x4f, 0x422: 0x50, 0x423: 0x51, 0x424: 0x52, 0x425: 0x53, 0x426: 0x54, 0x427: 0x55,
-       0x428: 0x56,
+       0x420: 0x4c, 0x421: 0x4d, 0x422: 0x4e, 0x423: 0x4f, 0x424: 0x50, 0x425: 0x51, 0x426: 0x52, 0x427: 0x53,
+       0x428: 0x54,
        // Block 0x11, offset 0x440
-       0x450: 0x0c, 0x451: 0x0d,
-       0x45d: 0x0e, 0x45f: 0x0f,
-       0x46f: 0x10,
+       0x450: 0x0a, 0x451: 0x0b,
+       0x45d: 0x0c, 0x45f: 0x0d,
+       0x46f: 0x0e,
 }
 
-var nfkcTrie = trie{nfkcLookup[:], nfkcValues[:], nfkcSparseValues[:], nfkcSparseOffset[:], 87}
+var nfkcTrie = trie{nfkcLookup[:], nfkcValues[:], nfkcSparseValues[:], nfkcSparseOffset[:], 85}
 
 // recompMap: 7448 bytes (entries only)
 var recompMap = map[uint32]rune{
index 93cb9c3390324524ec4ab5400956a162dc1932a1..82267a8d37fb440bdd96fe2b2cf2e968ee0800d6 100644 (file)
@@ -23,7 +23,7 @@ type trie struct {
 // the value for b is by r.value + (b - r.lo) * stride.
 func (t *trie) lookupValue(n uint8, b byte) uint16 {
        if n < t.cutoff {
-               return t.values[uint16(n)<<6+uint16(b&maskx)]
+               return t.values[uint16(n)<<6+uint16(b)]
        }
        offset := t.sparseOffset[n-t.cutoff]
        header := t.sparse[offset]
@@ -53,11 +53,6 @@ const (
        t5 = 0xF8 // 1111 1000
        t6 = 0xFC // 1111 1100
        te = 0xFE // 1111 1110
-
-       maskx = 0x3F // 0011 1111
-       mask2 = 0x1F // 0001 1111
-       mask3 = 0x0F // 0000 1111
-       mask4 = 0x07 // 0000 0111
 )
 
 // lookup returns the trie value for the first UTF-8 encoding in s and
@@ -89,7 +84,7 @@ func (t *trie) lookup(s []byte) (v uint16, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := uint16(i)<<6 + uint16(c1)&maskx
+               o := uint16(i)<<6 + uint16(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
@@ -105,13 +100,13 @@ func (t *trie) lookup(s []byte) (v uint16, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := uint16(i)<<6 + uint16(c1)&maskx
+               o := uint16(i)<<6 + uint16(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
                        return 0, 2
                }
-               o = uint16(i)<<6 + uint16(c2)&maskx
+               o = uint16(i)<<6 + uint16(c2)
                i = t.index[o]
                c3 := s[3]
                if c3 < tx || t2 <= c3 {
@@ -152,7 +147,7 @@ func (t *trie) lookupString(s string) (v uint16, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := uint16(i)<<6 + uint16(c1)&maskx
+               o := uint16(i)<<6 + uint16(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
@@ -168,13 +163,13 @@ func (t *trie) lookupString(s string) (v uint16, sz int) {
                if c1 < tx || t2 <= c1 {
                        return 0, 1
                }
-               o := uint16(i)<<6 + uint16(c1)&maskx
+               o := uint16(i)<<6 + uint16(c1)
                i = t.index[o]
                c2 := s[2]
                if c2 < tx || t2 <= c2 {
                        return 0, 2
                }
-               o = uint16(i)<<6 + uint16(c2)&maskx
+               o = uint16(i)<<6 + uint16(c2)
                i = t.index[o]
                c3 := s[3]
                if c3 < tx || t2 <= c3 {
@@ -200,11 +195,11 @@ func (t *trie) lookupUnsafe(s []byte) uint16 {
        if c0 < t3 {
                return t.lookupValue(i, s[1])
        }
-       i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
+       i = t.index[uint16(i)<<6+uint16(s[1])]
        if c0 < t4 {
                return t.lookupValue(i, s[2])
        }
-       i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
+       i = t.index[uint16(i)<<6+uint16(s[2])]
        if c0 < t5 {
                return t.lookupValue(i, s[3])
        }
@@ -225,11 +220,11 @@ func (t *trie) lookupStringUnsafe(s string) uint16 {
        if c0 < t3 {
                return t.lookupValue(i, s[1])
        }
-       i = t.index[uint16(i)<<6+uint16(s[1])&maskx]
+       i = t.index[uint16(i)<<6+uint16(s[1])]
        if c0 < t4 {
                return t.lookupValue(i, s[2])
        }
-       i = t.index[uint16(i)<<6+uint16(s[2])&maskx]
+       i = t.index[uint16(i)<<6+uint16(s[2])]
        if c0 < t5 {
                return t.lookupValue(i, s[3])
        }
index c457c9d974a99385ed5c68bacd8732ed43780251..1a75cc705689f79cd1ace4328815ea2932298278 100644 (file)
@@ -96,13 +96,17 @@ func TestLookup(t *testing.T) {
        }
        for i, tt := range tests {
                v, sz := testdata.lookup(tt.bytes)
-               if int(v) != 0 {
+               if v != 0 {
                        t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
                }
                if sz != tt.size {
                        t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
                }
        }
+       // Verify defaults.
+       if v, _ := testdata.lookup([]byte{0xC1, 0x8C}); v != 0 {
+               t.Errorf("lookup of non-existing rune should be 0; found %X", v)
+       }
 }
 
 func TestLookupUnsafe(t *testing.T) {
index 7f6276096c56e0ed6b97ab900db4bc97c4279edd..d6c832d46a1435dc3f3c8a74f8bb30f865cd2267 100644 (file)
@@ -4,7 +4,7 @@
 
 package norm
 
-var testRunes = []rune{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
+var testRunes = []int32{1, 12, 127, 128, 256, 2047, 2048, 2457, 65535, 65536, 65793, 1114111, 512, 513, 514, 528, 533}
 
 // testdataValues: 192 entries, 384 bytes
 // Block 2 is the null block.
@@ -62,24 +62,24 @@ var testdataLookup = [640]uint8{
        // Block 0x1, offset 0x40
        // Block 0x2, offset 0x80
        // Block 0x3, offset 0xc0
-       0x0c2: 0x03, 0x0c4: 0x04,
-       0x0c8: 0x05,
-       0x0df: 0x06,
-       0x0e0: 0x04,
-       0x0ef: 0x05,
-       0x0f0: 0x07, 0x0f4: 0x09,
+       0x0c2: 0x01, 0x0c4: 0x02,
+       0x0c8: 0x03,
+       0x0df: 0x04,
+       0x0e0: 0x02,
+       0x0ef: 0x03,
+       0x0f0: 0x05, 0x0f4: 0x07,
        // Block 0x4, offset 0x100
-       0x120: 0x07, 0x126: 0x08,
+       0x120: 0x05, 0x126: 0x06,
        // Block 0x5, offset 0x140
-       0x17f: 0x09,
+       0x17f: 0x07,
        // Block 0x6, offset 0x180
-       0x180: 0x0a, 0x184: 0x0b,
+       0x180: 0x08, 0x184: 0x09,
        // Block 0x7, offset 0x1c0
-       0x1d0: 0x06,
+       0x1d0: 0x04,
        // Block 0x8, offset 0x200
-       0x23f: 0x0c,
+       0x23f: 0x0a,
        // Block 0x9, offset 0x240
-       0x24f: 0x08,
+       0x24f: 0x06,
 }
 
-var testdataTrie = trie{testdataLookup[:], testdataValues[:], testdataSparseValues[:], testdataSparseOffset[:], 3}
+var testdataTrie = trie{testdataLookup[:], testdataValues[:], testdataSparseValues[:], testdataSparseOffset[:], 1}
index 2e275a06254f56a142af4aef313b7a9ea368bb51..1780ac7129dd45ba9c6a70b454eb41e6e11012c3 100644 (file)
@@ -19,8 +19,11 @@ import (
        "unicode/utf8"
 )
 
-const blockSize = 64
-const maxSparseEntries = 16
+const (
+       blockSize        = 64
+       blockOffset      = 2 // Substract two blocks to compensate for the 0x80 added to continuation bytes.
+       maxSparseEntries = 16
+)
 
 // Intermediate trie structure
 type trieNode struct {
@@ -157,7 +160,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int {
        if n.isInternal() {
                v, ok := index.lookupBlockIdx[h]
                if !ok {
-                       v = len(index.lookupBlocks)
+                       v = len(index.lookupBlocks) - blockOffset
                        index.lookupBlocks = append(index.lookupBlocks, n)
                        index.lookupBlockIdx[h] = v
                }
@@ -166,7 +169,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int {
                v, ok := index.valueBlockIdx[h]
                if !ok {
                        if c := n.countSparseEntries(); c > maxSparseEntries {
-                               v = len(index.valueBlocks)
+                               v = len(index.valueBlocks) - blockOffset
                                index.valueBlocks = append(index.valueBlocks, n)
                                index.valueBlockIdx[h] = v
                        } else {
@@ -295,7 +298,7 @@ func (t *trieNode) printTables(name string) int {
        }
        fmt.Print("\n}\n\n")
 
-       cutoff := len(index.valueBlocks)
+       cutoff := len(index.valueBlocks) - blockOffset
        ni := len(index.lookupBlocks) * blockSize
        fmt.Printf("// %sLookup: %d bytes\n", name, ni)
        fmt.Printf("// Block 0 is the null block.\n")