From: Marcel van Lohuizen Date: Sat, 28 Jul 2012 16:44:14 +0000 (+0200) Subject: exp/locale/collate: changed trie in first step towards support for multiple locales. X-Git-Tag: go1.1rc2~2764 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=601045e87a27178048c82c01e00c64bcb5bc8810;p=gostls13.git exp/locale/collate: changed trie in first step towards support for multiple locales. - Allow handles into the trie for different locales. Multiple tables share the same try to allow for reuse of blocks. - Significantly improved memory footprint and reduced allocations of trieNodes. This speeds up generation by about 30% and allows keeping trieNodes around for multiple locales during generation. - Renamed print method to fprint. R=r CC=golang-dev https://golang.org/cl/6408052 --- diff --git a/src/pkg/exp/locale/collate/build/builder.go b/src/pkg/exp/locale/collate/build/builder.go index 4b9fda6e9d..1528b9d0ce 100644 --- a/src/pkg/exp/locale/collate/build/builder.go +++ b/src/pkg/exp/locale/collate/build/builder.go @@ -66,6 +66,7 @@ func (e *entry) contractionStarter() bool { // tables using Add and AddTailoring before making any call to Build. This allows // Builder to ensure that a root table can support tailorings for each locale. type Builder struct { + index *trieBuilder entryMap map[string]*entry entry []*entry t *table @@ -76,6 +77,7 @@ type Builder struct { // NewBuilder returns a new Builder. func NewBuilder() *Builder { b := &Builder{ + index: newTrieBuilder(), entryMap: make(map[string]*entry), } return b @@ -218,7 +220,7 @@ func (b *Builder) Print(w io.Writer) (int, error) { return 0, err } // TODO: support multiple locales - n, _, err := t.print(w, "root") + n, _, err := t.fprint(w, "root") return n, err } @@ -510,7 +512,8 @@ func (b *Builder) buildTrie() { t.insert(e.runes[0], ce) } } - i, err := t.generate() + b.t.root = b.index.addTrie(t) + i, err := b.index.generate() b.t.index = *i b.error(err) } diff --git a/src/pkg/exp/locale/collate/build/table.go b/src/pkg/exp/locale/collate/build/table.go index 5e1d59a832..91ed51b6de 100644 --- a/src/pkg/exp/locale/collate/build/table.go +++ b/src/pkg/exp/locale/collate/build/table.go @@ -14,6 +14,7 @@ import ( // It implements the non-exported interface collate.tableInitializer type table struct { index trie // main trie + root *trieHandle // expansion info expandElem []uint32 @@ -32,6 +33,10 @@ func (t *table) TrieValues() []uint32 { return t.index.values } +func (t *table) FirstBlockOffsets() (i, v uint16) { + return t.root.lookupStart, t.root.valueStart +} + func (t *table) ExpandElems() []uint32 { return t.expandElem } @@ -51,7 +56,7 @@ func (t *table) MaxContractLen() int { // print writes the table as Go compilable code to w. It prefixes the // variable names with name. It returns the number of bytes written // and the size of the resulting table. -func (t *table) print(w io.Writer, name string) (n, size int, err error) { +func (t *table) fprint(w io.Writer, name string) (n, size int, err error) { update := func(nn, sz int, e error) { n += nn if err == nil { @@ -66,7 +71,7 @@ func (t *table) print(w io.Writer, name string) (n, size int, err error) { // Write main table. size += int(reflect.TypeOf(*t).Size()) p("var %sTable = table{\n", name) - update(t.index.printStruct(w, name)) + update(t.index.printStruct(w, t.root, name)) p(",\n") p("%sExpandElem[:],\n", name) update(t.contractTries.printStruct(w, name)) diff --git a/src/pkg/exp/locale/collate/build/trie.go b/src/pkg/exp/locale/collate/build/trie.go index 480cc58d15..d251a39653 100644 --- a/src/pkg/exp/locale/collate/build/trie.go +++ b/src/pkg/exp/locale/collate/build/trie.go @@ -15,7 +15,6 @@ import ( "fmt" "hash/fnv" "io" - "log" "reflect" ) @@ -24,6 +23,11 @@ const ( blockOffset = 2 // Substract 2 blocks to compensate for the 0x80 added to continuation bytes. ) +type trieHandle struct { + lookupStart uint16 // offset in table for first byte + valueStart uint16 // offset in table for first byte +} + type trie struct { index []uint16 values []uint32 @@ -31,181 +35,189 @@ type trie struct { // trieNode is the intermediate trie structure used for generating a trie. type trieNode struct { - table [256]*trieNode - value int64 + index []*trieNode + value []uint32 b byte - leaf bool + ref uint16 } func newNode() *trieNode { - return new(trieNode) + return &trieNode{ + index: make([]*trieNode, 64), + value: make([]uint32, 128), // root node size is 128 instead of 64 + } } func (n *trieNode) isInternal() bool { - internal := true - for i := 0; i < 256; i++ { - if nn := n.table[i]; nn != nil { - if !internal && !nn.leaf { - log.Fatalf("trie:isInternal: node contains both leaf and non-leaf children (%v)", n) - } - internal = internal && !nn.leaf - } - } - return internal + return n.value != nil } func (n *trieNode) insert(r rune, value uint32) { - for _, b := range []byte(string(r)) { - if n.leaf { - log.Fatalf("trie:insert: node (%#v) should not be a leaf", n) + const maskx = 0x3F // mask out two most-significant bits + str := string(r) + if len(str) == 1 { + n.value[str[0]] = value + return + } + for i := 0; i < len(str)-1; i++ { + b := str[i] & maskx + if n.index == nil { + n.index = make([]*trieNode, blockSize) } - nn := n.table[b] + nn := n.index[b] if nn == nil { - nn = newNode() + nn = &trieNode{} nn.b = b - n.table[b] = nn + n.index[b] = nn } n = nn } - n.value = int64(value) - n.leaf = true + if n.value == nil { + n.value = make([]uint32, blockSize) + } + b := str[len(str)-1] & maskx + n.value[b] = value } -type nodeIndex struct { +type trieBuilder struct { + t *trie + + roots []*trieHandle + lookupBlocks []*trieNode valueBlocks []*trieNode - lookupBlockIdx map[uint32]int64 - valueBlockIdx map[uint32]int64 + lookupBlockIdx map[uint32]*trieNode + valueBlockIdx map[uint32]*trieNode } -func newIndex() *nodeIndex { - index := &nodeIndex{} +func newTrieBuilder() *trieBuilder { + index := &trieBuilder{} index.lookupBlocks = make([]*trieNode, 0) index.valueBlocks = make([]*trieNode, 0) - index.lookupBlockIdx = make(map[uint32]int64) - index.valueBlockIdx = make(map[uint32]int64) + index.lookupBlockIdx = make(map[uint32]*trieNode) + index.valueBlockIdx = make(map[uint32]*trieNode) + // The third nil is the default null block. The other two blocks + // are used to guarantee an offset of at least 3 for each block. + index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil) + index.t = &trie{} return index } -func computeOffsets(index *nodeIndex, n *trieNode) int64 { - if n.leaf { - return n.value - } +func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode { hasher := fnv.New32() - // We only index continuation bytes. - for i := 0; i < blockSize; i++ { - v := int64(0) - if nn := n.table[0x80+i]; nn != nil { - v = computeOffsets(index, nn) + if n.index != nil { + for i, nn := range n.index { + v := uint16(0) + if nn != nil { + nn = b.computeOffsets(nn) + n.index[i] = nn + v = nn.ref + } + hasher.Write([]byte{byte(v >> 8), byte(v)}) } - hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}) - } - h := hasher.Sum32() - if n.isInternal() { - v, ok := index.lookupBlockIdx[h] + h := hasher.Sum32() + nn, ok := b.lookupBlockIdx[h] if !ok { - v = int64(len(index.lookupBlocks)) - blockOffset - index.lookupBlocks = append(index.lookupBlocks, n) - index.lookupBlockIdx[h] = v + n.ref = uint16(len(b.lookupBlocks)) - blockOffset + b.lookupBlocks = append(b.lookupBlocks, n) + b.lookupBlockIdx[h] = n + } else { + n = nn } - n.value = v } else { - v, ok := index.valueBlockIdx[h] + for _, v := range n.value { + hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}) + } + h := hasher.Sum32() + nn, ok := b.valueBlockIdx[h] if !ok { - v = int64(len(index.valueBlocks)) - blockOffset - index.valueBlocks = append(index.valueBlocks, n) - index.valueBlockIdx[h] = v + n.ref = uint16(len(b.valueBlocks)) - blockOffset + b.valueBlocks = append(b.valueBlocks, n) + b.valueBlockIdx[h] = n + } else { + n = nn } - n.value = v } - return n.value + return n } -func genValueBlock(t *trie, n *trieNode, offset int) error { - for i := 0; i < blockSize; i++ { - v := int64(0) - if nn := n.table[i+offset]; nn != nil { - v = nn.value - } - if v >= 1<<32 { - return fmt.Errorf("value %d at index %d does not fit in uint32", v, len(t.values)) - } - t.values = append(t.values, uint32(v)) +func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 { + hasher := fnv.New32() + for _, v := range n.value[:2*blockSize] { + hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)}) } - return nil + h := hasher.Sum32() + nn, ok := b.valueBlockIdx[h] + if !ok { + n.ref = uint16(len(b.valueBlocks)) + b.valueBlocks = append(b.valueBlocks, n) + // Add a dummy block to accommodate the double block size. + b.valueBlocks = append(b.valueBlocks, nil) + b.valueBlockIdx[h] = n + } else { + n = nn + } + return n.ref } -func genLookupBlock(t *trie, n *trieNode, offset int) error { - for i := 0; i < blockSize; i++ { - v := int64(0) - if nn := n.table[i+offset]; nn != nil { - v = nn.value - } - if v >= 1<<16 { - return fmt.Errorf("value %d at index %d does not fit in uint16", v, len(t.index)) +func genValueBlock(t *trie, n *trieNode) { + if n != nil { + for _, v := range n.value { + t.values = append(t.values, v) } - t.index = append(t.index, uint16(v)) } - return nil } -// generate generates and returns the trie for n. -func (n *trieNode) generate() (t *trie, err error) { - seterr := func(e error) { - if err == nil { - err = e +func genLookupBlock(t *trie, n *trieNode) { + for _, nn := range n.index { + v := uint16(0) + if nn != nil { + v = nn.ref } + t.index = append(t.index, v) } - index := newIndex() - // Values for 7-bit ASCII are stored in the first of two blocks, followed by a nil block. - index.valueBlocks = append(index.valueBlocks, nil, nil, nil) - // First byte of multi-byte UTF-8 codepoints are indexed in 4th block. - index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil, nil) - // Index starter bytes of multi-byte UTF-8. - for i := 0xC0; i < 0x100; i++ { - if n.table[i] != nil { - computeOffsets(index, n.table[i]) - } +} + +func (b *trieBuilder) addTrie(n *trieNode) *trieHandle { + h := &trieHandle{} + b.roots = append(b.roots, h) + h.valueStart = b.addStartValueBlock(n) + if len(b.roots) == 1 { + // We insert a null block after the first start value block. + // This ensures that continuation bytes UTF-8 sequences of length + // greater than 2 will automatically hit a null block if there + // was an undefined entry. + b.valueBlocks = append(b.valueBlocks, nil) } - t = &trie{} - seterr(genValueBlock(t, n, 0)) - seterr(genValueBlock(t, n, 64)) - seterr(genValueBlock(t, newNode(), 0)) - for i := 3; i < len(index.valueBlocks); i++ { - seterr(genValueBlock(t, index.valueBlocks[i], 0x80)) + n = b.computeOffsets(n) + // Offset by one extra block as the first byte starts at 0xC0 instead of 0x80. + h.lookupStart = n.ref - 1 + return h +} + +// generate generates and returns the trie for n. +func (b *trieBuilder) generate() (t *trie, err error) { + t = b.t + if len(b.valueBlocks) >= 1<<16 { + return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16) } - if len(index.valueBlocks) >= 1<<16 { - seterr(fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(index.valueBlocks), 1<<16)) - return + if len(b.lookupBlocks) >= 1<<16 { + return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16) } - seterr(genLookupBlock(t, newNode(), 0)) - seterr(genLookupBlock(t, newNode(), 0)) - seterr(genLookupBlock(t, newNode(), 0)) - seterr(genLookupBlock(t, n, 0xC0)) - for i := 4; i < len(index.lookupBlocks); i++ { - seterr(genLookupBlock(t, index.lookupBlocks[i], 0x80)) + genValueBlock(t, b.valueBlocks[0]) + genValueBlock(t, &trieNode{value: make([]uint32, 64)}) + for i := 2; i < len(b.valueBlocks); i++ { + genValueBlock(t, b.valueBlocks[i]) } - return -} - -// print writes a compilable trie to w. It returns the number of characters -// printed and the size of the generated structure in bytes. -func (t *trie) print(w io.Writer, name string) (n, size int, err error) { - update3 := func(nn, sz int, e error) { - n += nn - if err == nil { - err = e - } - size += sz + n := &trieNode{index: make([]*trieNode, 64)} + genLookupBlock(t, n) + genLookupBlock(t, n) + genLookupBlock(t, n) + for i := 3; i < len(b.lookupBlocks); i++ { + genLookupBlock(t, b.lookupBlocks[i]) } - update2 := func(nn int, e error) { update3(nn, 0, e) } - - update3(t.printArrays(w, name)) - update2(fmt.Fprintf(w, "var %sTrie = ", name)) - update3(t.printStruct(w, name)) - update2(fmt.Fprintln(w)) - return + return b.t, nil } func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) { @@ -261,8 +273,9 @@ func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) { return n, nv*4 + ni*2, err } -func (t *trie) printStruct(w io.Writer, name string) (n, sz int, err error) { - n, err = fmt.Fprintf(w, "trie{ %sLookup[:], %sValues[:]}", name, name) +func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) { + const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}" + n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name) sz += int(reflect.TypeOf(trie{}).Size()) return } diff --git a/src/pkg/exp/locale/collate/build/trie_test.go b/src/pkg/exp/locale/collate/build/trie_test.go index 3ecbd841c5..11da56664d 100644 --- a/src/pkg/exp/locale/collate/build/trie_test.go +++ b/src/pkg/exp/locale/collate/build/trie_test.go @@ -6,6 +6,7 @@ package build import ( "bytes" + "fmt" "testing" ) @@ -24,7 +25,9 @@ func makeTestTrie(t *testing.T) trie { for i, r := range testRunes { n.insert(r, uint32(i)) } - tr, err := n.generate() + idx := newTrieBuilder() + idx.addTrie(n) + tr, err := idx.generate() if err != nil { t.Errorf(err.Error()) } @@ -34,9 +37,11 @@ func makeTestTrie(t *testing.T) trie { func TestGenerateTrie(t *testing.T) { testdata := makeTestTrie(t) buf := &bytes.Buffer{} - testdata.print(buf, "test") + testdata.printArrays(buf, "test") + fmt.Fprintf(buf, "var testTrie = ") + testdata.printStruct(buf, &trieHandle{19, 0}, "test") if output != buf.String() { - t.Errorf("output differs") + t.Error("output differs") } } @@ -79,25 +84,24 @@ var testLookup = [640]uint16 { // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 - 0x0c2:0x01, 0x0c4:0x02, - 0x0c8:0x03, - 0x0df:0x04, - 0x0e0:0x02, - 0x0ef:0x03, - 0x0f0:0x05, 0x0f4:0x07, + 0x0e0:0x05, 0x0e6:0x06, // Block 0x4, offset 0x100 - 0x120:0x05, 0x126:0x06, + 0x13f:0x07, // Block 0x5, offset 0x140 - 0x17f:0x07, + 0x140:0x08, 0x144:0x09, // Block 0x6, offset 0x180 - 0x180:0x08, 0x184:0x09, + 0x190:0x03, // Block 0x7, offset 0x1c0 - 0x1d0:0x04, + 0x1ff:0x0a, // Block 0x8, offset 0x200 - 0x23f:0x0a, + 0x20f:0x05, // Block 0x9, offset 0x240 - 0x24f:0x06, + 0x242:0x01, 0x244:0x02, + 0x248:0x03, + 0x25f:0x04, + 0x260:0x01, + 0x26f:0x02, + 0x270:0x04, 0x274:0x06, } -var testTrie = trie{ testLookup[:], testValues[:]} -` +var testTrie = trie{ testLookup[1216:], testValues[0:], testLookup[:], testValues[:]}` diff --git a/src/pkg/exp/locale/collate/export.go b/src/pkg/exp/locale/collate/export.go index 914f0d8d85..c152296f57 100644 --- a/src/pkg/exp/locale/collate/export.go +++ b/src/pkg/exp/locale/collate/export.go @@ -12,8 +12,11 @@ func Init(data interface{}) *Collator { return nil } t := &table{} + loff, voff := init.FirstBlockOffsets() t.index.index = init.TrieIndex() + t.index.index0 = t.index.index[blockSize*loff:] t.index.values = init.TrieValues() + t.index.values0 = t.index.values[blockSize*voff:] t.expandElem = init.ExpandElems() t.contractTries = init.ContractTries() t.contractElem = init.ContractElems() @@ -24,6 +27,7 @@ func Init(data interface{}) *Collator { type tableInitializer interface { TrieIndex() []uint16 TrieValues() []uint32 + FirstBlockOffsets() (lookup, value uint16) ExpandElems() []uint32 ContractTries() []struct{ l, h, n, i uint8 } ContractElems() []uint32 diff --git a/src/pkg/exp/locale/collate/table_test.go b/src/pkg/exp/locale/collate/table_test.go index 9c57836d4b..cd6d027254 100644 --- a/src/pkg/exp/locale/collate/table_test.go +++ b/src/pkg/exp/locale/collate/table_test.go @@ -45,9 +45,10 @@ func makeTable(in []input) (*collate.Collator, error) { b.Add([]rune(r.str), r.ces) } c, err := b.Build("") - if err == nil { - collate.InitCollator(c) + if c == nil { + return nil, err } + collate.InitCollator(c) return c, err } diff --git a/src/pkg/exp/locale/collate/tables.go b/src/pkg/exp/locale/collate/tables.go index 3f99c92bda..43e310c377 100644 --- a/src/pkg/exp/locale/collate/tables.go +++ b/src/pkg/exp/locale/collate/tables.go @@ -19,7 +19,7 @@ var ( ) var rootTable = table{ - trie{rootLookup[:], rootValues[:]}, + trie{rootLookup[1216:], rootValues[0:], rootLookup[:], rootValues[:]}, rootExpandElem[:], contractTrieSet(rootCTEntries[:]), rootContractElem[:], @@ -7049,94 +7049,94 @@ var rootLookup = [1472]uint16{ // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 - 0x0c2: 0x01, 0x0c3: 0x02, 0x0c4: 0x03, 0x0c5: 0x04, 0x0c6: 0x05, 0x0c7: 0x06, - 0x0c8: 0x07, 0x0c9: 0x08, 0x0ca: 0x09, 0x0cb: 0x0a, 0x0cc: 0x0b, 0x0cd: 0x0c, 0x0ce: 0x0d, 0x0cf: 0x0e, - 0x0d0: 0x0f, 0x0d1: 0x10, 0x0d2: 0x11, 0x0d3: 0x12, 0x0d4: 0x13, 0x0d5: 0x14, 0x0d6: 0x15, 0x0d7: 0x16, - 0x0d8: 0x17, 0x0d9: 0x18, 0x0da: 0x19, 0x0db: 0x1a, 0x0dc: 0x1b, 0x0dd: 0x1c, 0x0de: 0x1d, 0x0df: 0x1e, - 0x0e0: 0x02, 0x0e1: 0x03, 0x0e2: 0x04, 0x0e3: 0x05, 0x0e4: 0x06, - 0x0ea: 0x07, 0x0ed: 0x08, 0x0ef: 0x09, - 0x0f0: 0x12, 0x0f3: 0x14, + 0x0e0: 0x1f, 0x0e1: 0x20, 0x0e4: 0x21, 0x0e5: 0x22, 0x0e6: 0x23, 0x0e7: 0x24, + 0x0e8: 0x25, 0x0e9: 0x26, 0x0ea: 0x27, 0x0eb: 0x28, 0x0ec: 0x29, 0x0ed: 0x2a, 0x0ee: 0x2b, 0x0ef: 0x2c, + 0x0f0: 0x2d, 0x0f1: 0x2e, 0x0f2: 0x2f, 0x0f3: 0x30, 0x0f4: 0x31, 0x0f5: 0x32, 0x0f6: 0x33, 0x0f7: 0x34, + 0x0f8: 0x35, 0x0f9: 0x36, 0x0fa: 0x37, 0x0fb: 0x38, 0x0fc: 0x39, 0x0fd: 0x3a, 0x0fe: 0x3b, 0x0ff: 0x3c, // Block 0x4, offset 0x100 - 0x120: 0x1f, 0x121: 0x20, 0x124: 0x21, 0x125: 0x22, 0x126: 0x23, 0x127: 0x24, - 0x128: 0x25, 0x129: 0x26, 0x12a: 0x27, 0x12b: 0x28, 0x12c: 0x29, 0x12d: 0x2a, 0x12e: 0x2b, 0x12f: 0x2c, - 0x130: 0x2d, 0x131: 0x2e, 0x132: 0x2f, 0x133: 0x30, 0x134: 0x31, 0x135: 0x32, 0x136: 0x33, 0x137: 0x34, - 0x138: 0x35, 0x139: 0x36, 0x13a: 0x37, 0x13b: 0x38, 0x13c: 0x39, 0x13d: 0x3a, 0x13e: 0x3b, 0x13f: 0x3c, + 0x100: 0x3d, 0x101: 0x3e, 0x102: 0x3f, 0x103: 0x40, 0x104: 0x41, 0x105: 0x42, 0x106: 0x43, 0x107: 0x44, + 0x108: 0x45, 0x109: 0x46, 0x10a: 0x47, 0x10b: 0x48, 0x10c: 0x49, 0x10d: 0x4a, 0x10e: 0x4b, 0x10f: 0x4c, + 0x110: 0x4d, 0x111: 0x4e, 0x112: 0x4f, 0x113: 0x50, 0x114: 0x51, 0x115: 0x52, 0x116: 0x53, 0x117: 0x54, + 0x118: 0x55, 0x119: 0x56, 0x11a: 0x57, 0x11b: 0x58, 0x11c: 0x59, 0x11d: 0x5a, 0x11e: 0x5b, 0x11f: 0x5c, + 0x120: 0x5d, 0x121: 0x5e, 0x122: 0x5f, 0x123: 0x60, 0x124: 0x61, 0x125: 0x62, 0x126: 0x63, 0x127: 0x64, + 0x128: 0x65, 0x129: 0x66, 0x12a: 0x67, 0x12c: 0x68, 0x12d: 0x69, 0x12e: 0x6a, 0x12f: 0x6b, + 0x130: 0x6c, 0x131: 0x6d, 0x133: 0x6e, 0x134: 0x6f, 0x135: 0x70, 0x136: 0x71, 0x137: 0x72, + 0x13a: 0x73, 0x13b: 0x74, 0x13e: 0x75, 0x13f: 0x76, // Block 0x5, offset 0x140 - 0x140: 0x3d, 0x141: 0x3e, 0x142: 0x3f, 0x143: 0x40, 0x144: 0x41, 0x145: 0x42, 0x146: 0x43, 0x147: 0x44, - 0x148: 0x45, 0x149: 0x46, 0x14a: 0x47, 0x14b: 0x48, 0x14c: 0x49, 0x14d: 0x4a, 0x14e: 0x4b, 0x14f: 0x4c, - 0x150: 0x4d, 0x151: 0x4e, 0x152: 0x4f, 0x153: 0x50, 0x154: 0x51, 0x155: 0x52, 0x156: 0x53, 0x157: 0x54, - 0x158: 0x55, 0x159: 0x56, 0x15a: 0x57, 0x15b: 0x58, 0x15c: 0x59, 0x15d: 0x5a, 0x15e: 0x5b, 0x15f: 0x5c, - 0x160: 0x5d, 0x161: 0x5e, 0x162: 0x5f, 0x163: 0x60, 0x164: 0x61, 0x165: 0x62, 0x166: 0x63, 0x167: 0x64, - 0x168: 0x65, 0x169: 0x66, 0x16a: 0x67, 0x16c: 0x68, 0x16d: 0x69, 0x16e: 0x6a, 0x16f: 0x6b, - 0x170: 0x6c, 0x171: 0x6d, 0x173: 0x6e, 0x174: 0x6f, 0x175: 0x70, 0x176: 0x71, 0x177: 0x72, - 0x17a: 0x73, 0x17b: 0x74, 0x17e: 0x75, 0x17f: 0x76, + 0x140: 0x77, 0x141: 0x78, 0x142: 0x79, 0x143: 0x7a, 0x144: 0x7b, 0x145: 0x7c, 0x146: 0x7d, 0x147: 0x7e, + 0x148: 0x7f, 0x149: 0x80, 0x14a: 0x81, 0x14b: 0x82, 0x14c: 0x83, 0x14d: 0x84, 0x14e: 0x85, 0x14f: 0x86, + 0x150: 0x87, 0x151: 0x88, 0x152: 0x89, 0x153: 0x8a, 0x154: 0x8b, 0x155: 0x8c, 0x156: 0x8d, 0x157: 0x8e, + 0x158: 0x8f, 0x159: 0x90, 0x15a: 0x91, 0x15b: 0x92, 0x15c: 0x93, 0x15d: 0x94, 0x15e: 0x95, 0x15f: 0x96, + 0x160: 0x97, 0x161: 0x98, 0x162: 0x99, 0x163: 0x9a, 0x164: 0x9b, 0x165: 0x9c, 0x166: 0x9d, 0x167: 0x9e, + 0x168: 0x9f, 0x169: 0xa0, 0x16a: 0xa1, 0x16b: 0xa2, 0x16c: 0xa3, 0x16d: 0xa4, + 0x170: 0xa5, 0x171: 0xa6, 0x172: 0xa7, 0x173: 0xa8, 0x174: 0xa9, 0x175: 0xaa, 0x176: 0xab, 0x177: 0xac, + 0x178: 0xad, 0x17a: 0xae, 0x17b: 0xaf, 0x17c: 0xb0, 0x17d: 0xb0, 0x17e: 0xb0, 0x17f: 0xb1, // Block 0x6, offset 0x180 - 0x180: 0x77, 0x181: 0x78, 0x182: 0x79, 0x183: 0x7a, 0x184: 0x7b, 0x185: 0x7c, 0x186: 0x7d, 0x187: 0x7e, - 0x188: 0x7f, 0x189: 0x80, 0x18a: 0x81, 0x18b: 0x82, 0x18c: 0x83, 0x18d: 0x84, 0x18e: 0x85, 0x18f: 0x86, - 0x190: 0x87, 0x191: 0x88, 0x192: 0x89, 0x193: 0x8a, 0x194: 0x8b, 0x195: 0x8c, 0x196: 0x8d, 0x197: 0x8e, - 0x198: 0x8f, 0x199: 0x90, 0x19a: 0x91, 0x19b: 0x92, 0x19c: 0x93, 0x19d: 0x94, 0x19e: 0x95, 0x19f: 0x96, - 0x1a0: 0x97, 0x1a1: 0x98, 0x1a2: 0x99, 0x1a3: 0x9a, 0x1a4: 0x9b, 0x1a5: 0x9c, 0x1a6: 0x9d, 0x1a7: 0x9e, - 0x1a8: 0x9f, 0x1a9: 0xa0, 0x1aa: 0xa1, 0x1ab: 0xa2, 0x1ac: 0xa3, 0x1ad: 0xa4, - 0x1b0: 0xa5, 0x1b1: 0xa6, 0x1b2: 0xa7, 0x1b3: 0xa8, 0x1b4: 0xa9, 0x1b5: 0xaa, 0x1b6: 0xab, 0x1b7: 0xac, - 0x1b8: 0xad, 0x1ba: 0xae, 0x1bb: 0xaf, 0x1bc: 0xb0, 0x1bd: 0xb0, 0x1be: 0xb0, 0x1bf: 0xb1, + 0x180: 0xb2, 0x181: 0xb3, 0x182: 0xb4, 0x183: 0xb5, 0x184: 0xb6, 0x185: 0xb0, 0x186: 0xb7, 0x187: 0xb8, + 0x188: 0xb9, 0x189: 0xba, 0x18a: 0xbb, 0x18b: 0xbc, 0x18c: 0xbd, 0x18d: 0xbe, 0x18e: 0xbf, 0x18f: 0xc0, // Block 0x7, offset 0x1c0 - 0x1c0: 0xb2, 0x1c1: 0xb3, 0x1c2: 0xb4, 0x1c3: 0xb5, 0x1c4: 0xb6, 0x1c5: 0xb0, 0x1c6: 0xb7, 0x1c7: 0xb8, - 0x1c8: 0xb9, 0x1c9: 0xba, 0x1ca: 0xbb, 0x1cb: 0xbc, 0x1cc: 0xbd, 0x1cd: 0xbe, 0x1ce: 0xbf, 0x1cf: 0xc0, + 0x1f7: 0xc1, // Block 0x8, offset 0x200 - 0x237: 0xc1, + 0x200: 0xc2, 0x201: 0xc3, 0x202: 0xc4, 0x203: 0xc5, 0x204: 0xc6, 0x205: 0xc7, 0x206: 0xc8, 0x207: 0xc9, + 0x208: 0xca, 0x209: 0xcb, 0x20a: 0xcc, 0x20b: 0xcd, 0x20c: 0xce, 0x20d: 0xcf, 0x20e: 0xd0, 0x20f: 0xd1, + 0x210: 0xd2, 0x211: 0xd3, 0x212: 0xd4, 0x213: 0xd5, 0x214: 0xd6, 0x215: 0xd7, 0x216: 0xd8, 0x217: 0xd9, + 0x218: 0xda, 0x219: 0xdb, 0x21a: 0xdc, 0x21b: 0xdd, 0x21c: 0xde, 0x21d: 0xdf, 0x21e: 0xe0, 0x21f: 0xe1, + 0x220: 0xe2, 0x221: 0xe3, 0x222: 0xe4, 0x223: 0xe5, 0x224: 0xe6, 0x225: 0xe7, 0x226: 0xe8, 0x227: 0xe9, + 0x228: 0xea, 0x229: 0xeb, 0x22a: 0xec, 0x22b: 0xed, 0x22c: 0xee, 0x22f: 0xef, // Block 0x9, offset 0x240 - 0x240: 0xc2, 0x241: 0xc3, 0x242: 0xc4, 0x243: 0xc5, 0x244: 0xc6, 0x245: 0xc7, 0x246: 0xc8, 0x247: 0xc9, - 0x248: 0xca, 0x249: 0xcb, 0x24a: 0xcc, 0x24b: 0xcd, 0x24c: 0xce, 0x24d: 0xcf, 0x24e: 0xd0, 0x24f: 0xd1, - 0x250: 0xd2, 0x251: 0xd3, 0x252: 0xd4, 0x253: 0xd5, 0x254: 0xd6, 0x255: 0xd7, 0x256: 0xd8, 0x257: 0xd9, - 0x258: 0xda, 0x259: 0xdb, 0x25a: 0xdc, 0x25b: 0xdd, 0x25c: 0xde, 0x25d: 0xdf, 0x25e: 0xe0, 0x25f: 0xe1, - 0x260: 0xe2, 0x261: 0xe3, 0x262: 0xe4, 0x263: 0xe5, 0x264: 0xe6, 0x265: 0xe7, 0x266: 0xe8, 0x267: 0xe9, - 0x268: 0xea, 0x269: 0xeb, 0x26a: 0xec, 0x26b: 0xed, 0x26c: 0xee, 0x26f: 0xef, + 0x25e: 0xf0, 0x25f: 0xf1, // Block 0xa, offset 0x280 - 0x29e: 0xf0, 0x29f: 0xf1, + 0x2a8: 0xf2, 0x2ac: 0xf3, 0x2ad: 0xf4, 0x2ae: 0xf5, 0x2af: 0xf6, + 0x2b0: 0xf7, 0x2b1: 0xf8, 0x2b2: 0xf9, 0x2b3: 0xfa, 0x2b4: 0xfb, 0x2b5: 0xfc, 0x2b6: 0xfd, 0x2b7: 0xfe, + 0x2b8: 0xff, 0x2b9: 0x100, 0x2ba: 0x101, 0x2bb: 0x102, 0x2bc: 0x103, 0x2bd: 0x104, 0x2be: 0x105, 0x2bf: 0x106, // Block 0xb, offset 0x2c0 - 0x2e8: 0xf2, 0x2ec: 0xf3, 0x2ed: 0xf4, 0x2ee: 0xf5, 0x2ef: 0xf6, - 0x2f0: 0xf7, 0x2f1: 0xf8, 0x2f2: 0xf9, 0x2f3: 0xfa, 0x2f4: 0xfb, 0x2f5: 0xfc, 0x2f6: 0xfd, 0x2f7: 0xfe, - 0x2f8: 0xff, 0x2f9: 0x100, 0x2fa: 0x101, 0x2fb: 0x102, 0x2fc: 0x103, 0x2fd: 0x104, 0x2fe: 0x105, 0x2ff: 0x106, + 0x2c0: 0x107, 0x2c1: 0x108, 0x2c2: 0x109, 0x2c3: 0x10a, 0x2c4: 0x10b, 0x2c5: 0x10c, 0x2c6: 0x10d, 0x2c7: 0x10e, + 0x2ca: 0x10f, 0x2cb: 0x110, 0x2cc: 0x111, 0x2cd: 0x112, 0x2ce: 0x113, 0x2cf: 0x114, + 0x2d0: 0x115, 0x2d1: 0x116, 0x2d2: 0x117, + 0x2e0: 0x118, 0x2e1: 0x119, 0x2e4: 0x11a, + 0x2e8: 0x11b, 0x2e9: 0x11c, 0x2ec: 0x11d, 0x2ed: 0x11e, + 0x2f0: 0x11f, 0x2f1: 0x120, + 0x2f9: 0x121, // Block 0xc, offset 0x300 - 0x300: 0x107, 0x301: 0x108, 0x302: 0x109, 0x303: 0x10a, 0x304: 0x10b, 0x305: 0x10c, 0x306: 0x10d, 0x307: 0x10e, - 0x30a: 0x10f, 0x30b: 0x110, 0x30c: 0x111, 0x30d: 0x112, 0x30e: 0x113, 0x30f: 0x114, - 0x310: 0x115, 0x311: 0x116, 0x312: 0x117, - 0x320: 0x118, 0x321: 0x119, 0x324: 0x11a, - 0x328: 0x11b, 0x329: 0x11c, 0x32c: 0x11d, 0x32d: 0x11e, - 0x330: 0x11f, 0x331: 0x120, - 0x339: 0x121, + 0x300: 0x122, 0x301: 0x123, 0x302: 0x124, 0x303: 0x125, // Block 0xd, offset 0x340 - 0x340: 0x122, 0x341: 0x123, 0x342: 0x124, 0x343: 0x125, + 0x340: 0x126, 0x341: 0x127, 0x342: 0x128, 0x343: 0x129, 0x344: 0x12a, 0x345: 0x12b, 0x346: 0x12c, 0x347: 0x12d, + 0x348: 0x12e, 0x349: 0x12f, 0x34a: 0x130, 0x34b: 0x131, 0x34c: 0x132, 0x34d: 0x133, + 0x350: 0x134, 0x351: 0x135, // Block 0xe, offset 0x380 - 0x380: 0x126, 0x381: 0x127, 0x382: 0x128, 0x383: 0x129, 0x384: 0x12a, 0x385: 0x12b, 0x386: 0x12c, 0x387: 0x12d, - 0x388: 0x12e, 0x389: 0x12f, 0x38a: 0x130, 0x38b: 0x131, 0x38c: 0x132, 0x38d: 0x133, - 0x390: 0x134, 0x391: 0x135, + 0x380: 0x136, 0x381: 0x137, 0x382: 0x138, 0x383: 0x139, 0x384: 0x13a, 0x385: 0x13b, 0x386: 0x13c, 0x387: 0x13d, + 0x388: 0x13e, 0x389: 0x13f, 0x38a: 0x140, 0x38b: 0x141, 0x38c: 0x142, 0x38d: 0x143, 0x38e: 0x144, 0x38f: 0x145, + 0x390: 0x146, // Block 0xf, offset 0x3c0 - 0x3c0: 0x136, 0x3c1: 0x137, 0x3c2: 0x138, 0x3c3: 0x139, 0x3c4: 0x13a, 0x3c5: 0x13b, 0x3c6: 0x13c, 0x3c7: 0x13d, - 0x3c8: 0x13e, 0x3c9: 0x13f, 0x3ca: 0x140, 0x3cb: 0x141, 0x3cc: 0x142, 0x3cd: 0x143, 0x3ce: 0x144, 0x3cf: 0x145, - 0x3d0: 0x146, + 0x3e0: 0x147, 0x3e1: 0x148, 0x3e2: 0x149, 0x3e3: 0x14a, 0x3e4: 0x14b, 0x3e5: 0x14c, 0x3e6: 0x14d, 0x3e7: 0x14e, + 0x3e8: 0x14f, // Block 0x10, offset 0x400 - 0x420: 0x147, 0x421: 0x148, 0x422: 0x149, 0x423: 0x14a, 0x424: 0x14b, 0x425: 0x14c, 0x426: 0x14d, 0x427: 0x14e, - 0x428: 0x14f, + 0x400: 0x150, // Block 0x11, offset 0x440 - 0x440: 0x150, + 0x440: 0x151, 0x441: 0x152, 0x442: 0x153, 0x443: 0x154, 0x444: 0x155, 0x445: 0x156, 0x446: 0x157, 0x447: 0x158, + 0x448: 0x159, 0x449: 0x15a, 0x44c: 0x15b, 0x44d: 0x15c, + 0x450: 0x15d, 0x451: 0x15e, 0x452: 0x15f, 0x453: 0x160, 0x454: 0x161, 0x455: 0x162, 0x456: 0x163, 0x457: 0x164, + 0x458: 0x165, 0x459: 0x166, 0x45a: 0x167, 0x45b: 0x168, 0x45c: 0x169, 0x45d: 0x16a, 0x45e: 0x16b, 0x45f: 0x16c, // Block 0x12, offset 0x480 - 0x480: 0x151, 0x481: 0x152, 0x482: 0x153, 0x483: 0x154, 0x484: 0x155, 0x485: 0x156, 0x486: 0x157, 0x487: 0x158, - 0x488: 0x159, 0x489: 0x15a, 0x48c: 0x15b, 0x48d: 0x15c, - 0x490: 0x15d, 0x491: 0x15e, 0x492: 0x15f, 0x493: 0x160, 0x494: 0x161, 0x495: 0x162, 0x496: 0x163, 0x497: 0x164, - 0x498: 0x165, 0x499: 0x166, 0x49a: 0x167, 0x49b: 0x168, 0x49c: 0x169, 0x49d: 0x16a, 0x49e: 0x16b, 0x49f: 0x16c, + 0x480: 0x16d, 0x481: 0x16e, 0x482: 0x16f, 0x483: 0x170, 0x484: 0x171, 0x485: 0x172, 0x486: 0x173, 0x487: 0x174, + 0x488: 0x175, 0x489: 0x176, 0x48c: 0x177, 0x48d: 0x178, 0x48e: 0x179, 0x48f: 0x17a, + 0x490: 0x17b, 0x491: 0x17c, 0x492: 0x17d, 0x493: 0x17e, 0x494: 0x17f, 0x495: 0x180, 0x497: 0x181, + 0x498: 0x182, 0x499: 0x183, 0x49a: 0x184, 0x49b: 0x185, 0x49c: 0x186, 0x49d: 0x187, // Block 0x13, offset 0x4c0 - 0x4c0: 0x16d, 0x4c1: 0x16e, 0x4c2: 0x16f, 0x4c3: 0x170, 0x4c4: 0x171, 0x4c5: 0x172, 0x4c6: 0x173, 0x4c7: 0x174, - 0x4c8: 0x175, 0x4c9: 0x176, 0x4cc: 0x177, 0x4cd: 0x178, 0x4ce: 0x179, 0x4cf: 0x17a, - 0x4d0: 0x17b, 0x4d1: 0x17c, 0x4d2: 0x17d, 0x4d3: 0x17e, 0x4d4: 0x17f, 0x4d5: 0x180, 0x4d7: 0x181, - 0x4d8: 0x182, 0x4d9: 0x183, 0x4da: 0x184, 0x4db: 0x185, 0x4dc: 0x186, 0x4dd: 0x187, + 0x4d0: 0x09, 0x4d1: 0x0a, 0x4d2: 0x0b, 0x4d3: 0x0c, 0x4d6: 0x0d, + 0x4db: 0x0e, 0x4dd: 0x0f, 0x4df: 0x10, // Block 0x14, offset 0x500 - 0x510: 0x0a, 0x511: 0x0b, 0x512: 0x0c, 0x513: 0x0d, 0x516: 0x0e, - 0x51b: 0x0f, 0x51d: 0x10, 0x51f: 0x11, + 0x500: 0x188, 0x501: 0x189, 0x504: 0x189, 0x505: 0x189, 0x506: 0x189, 0x507: 0x18a, // Block 0x15, offset 0x540 - 0x540: 0x188, 0x541: 0x189, 0x544: 0x189, 0x545: 0x189, 0x546: 0x189, 0x547: 0x18a, + 0x560: 0x12, // Block 0x16, offset 0x580 - 0x5a0: 0x13, + 0x582: 0x01, 0x583: 0x02, 0x584: 0x03, 0x585: 0x04, 0x586: 0x05, 0x587: 0x06, + 0x588: 0x07, 0x589: 0x08, 0x58a: 0x09, 0x58b: 0x0a, 0x58c: 0x0b, 0x58d: 0x0c, 0x58e: 0x0d, 0x58f: 0x0e, + 0x590: 0x0f, 0x591: 0x10, 0x592: 0x11, 0x593: 0x12, 0x594: 0x13, 0x595: 0x14, 0x596: 0x15, 0x597: 0x16, + 0x598: 0x17, 0x599: 0x18, 0x59a: 0x19, 0x59b: 0x1a, 0x59c: 0x1b, 0x59d: 0x1c, 0x59e: 0x1d, 0x59f: 0x1e, + 0x5a0: 0x01, 0x5a1: 0x02, 0x5a2: 0x03, 0x5a3: 0x04, 0x5a4: 0x05, + 0x5aa: 0x06, 0x5ad: 0x07, 0x5af: 0x08, + 0x5b0: 0x11, 0x5b3: 0x13, } // rootCTEntries: 126 entries, 504 bytes @@ -7269,4 +7269,4 @@ var rootCTEntries = [126]struct{ l, h, n, i uint8 }{ {0xB5, 0xB5, 0, 1}, } -// Total size of rootTable is 126924 bytes +// Total size of rootTable is 126932 bytes diff --git a/src/pkg/exp/locale/collate/trie.go b/src/pkg/exp/locale/collate/trie.go index 00fe91bf03..c70a89b089 100644 --- a/src/pkg/exp/locale/collate/trie.go +++ b/src/pkg/exp/locale/collate/trie.go @@ -14,8 +14,10 @@ package collate const blockSize = 64 type trie struct { - index []uint16 - values []uint32 + index0 []uint16 // index for first byte (0xC0-0xFF) + values0 []uint32 // index for first byte (0x00-0x7F) + index []uint16 + values []uint32 } const ( @@ -40,14 +42,14 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) { c0 := s[0] switch { case c0 < tx: - return colElem(t.values[c0]), 1 + return colElem(t.values0[c0]), 1 case c0 < t2: return 0, 1 case c0 < t3: if len(s) < 2 { return 0, 0 } - i := t.index[c0] + i := t.index0[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 @@ -57,7 +59,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) { if len(s) < 3 { return 0, 0 } - i := t.index[c0] + i := t.index0[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 @@ -73,7 +75,7 @@ func (t *trie) lookup(s []byte) (v colElem, sz int) { if len(s) < 4 { return 0, 0 } - i := t.index[c0] + i := t.index0[c0] c1 := s[1] if c1 < tx || t2 <= c1 { return 0, 1 diff --git a/src/pkg/exp/locale/collate/trie_test.go b/src/pkg/exp/locale/collate/trie_test.go index 891a5e3a31..00d636c6f8 100644 --- a/src/pkg/exp/locale/collate/trie_test.go +++ b/src/pkg/exp/locale/collate/trie_test.go @@ -89,18 +89,18 @@ var testValues = [832]uint32{ } var testLookup = [640]uint16{ - 0x0c2: 0x01, 0x0c4: 0x02, - 0x0c8: 0x03, - 0x0df: 0x04, - 0x0e0: 0x02, - 0x0ef: 0x03, - 0x0f0: 0x05, 0x0f4: 0x07, - 0x120: 0x05, 0x126: 0x06, - 0x17f: 0x07, - 0x180: 0x08, 0x184: 0x09, - 0x1d0: 0x04, - 0x23f: 0x0a, - 0x24f: 0x06, + 0x0e0: 0x05, 0x0e6: 0x06, + 0x13f: 0x07, + 0x140: 0x08, 0x144: 0x09, + 0x190: 0x03, + 0x1ff: 0x0a, + 0x20f: 0x05, + 0x242: 0x01, 0x244: 0x02, + 0x248: 0x03, + 0x25f: 0x04, + 0x260: 0x01, + 0x26f: 0x02, + 0x270: 0x04, 0x274: 0x06, } -var testTrie = trie{testLookup[:], testValues[:]} +var testTrie = trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}