From: Marcel van Lohuizen Date: Wed, 2 May 2012 15:01:41 +0000 (+0200) Subject: exp/locale/collate: fixed two bugs uncovered by regression tests. X-Git-Tag: go1.1rc2~3263 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=10838165d8249175020155751469bc7729d78fb9;p=gostls13.git exp/locale/collate: fixed two bugs uncovered by regression tests. The first bug was that tertiary ignorables had the same colElem as implicit colElems, yielding unexpected results. The current encoding ensures that a non-implicit colElem is never 0. This fix uncovered another bug of the trie that indexed incorrectly into the null block. This was caused by an unfinished optimization that would avoid the need to max out the most-significant bits of continuation bytes. This bug was also present in the trie used in exp/norm and has been fixed there as well. The appearence of the bug was rare, as the lower blocks happened to be nearly nil. R=r CC=golang-dev https://golang.org/cl/6127070 --- diff --git a/src/pkg/exp/locale/collate/build/colelem.go b/src/pkg/exp/locale/collate/build/colelem.go index 09425320fd..3d5e27c67d 100644 --- a/src/pkg/exp/locale/collate/build/colelem.go +++ b/src/pkg/exp/locale/collate/build/colelem.go @@ -25,11 +25,11 @@ const ( // For normal collation elements, we assume that a collation element either has // a primary or non-default secondary value, not both. // Collation elements with a primary value are of the form -// 010ppppp pppppppp pppppppp tttttttt, where +// 000ppppp pppppppp pppppppp tttttttt, where // - p* is primary collation value // - t* is the tertiary collation value // Collation elements with a secondary value are of the form -// 00000000 ssssssss ssssssss tttttttt, where +// 01000000 ssssssss ssssssss tttttttt, where // - s* is the secondary collation value // - t* is the tertiary collation value const ( @@ -37,7 +37,7 @@ const ( maxSecondaryBits = 16 maxTertiaryBits = 8 - isPrimary = 0x40000000 + isSecondary = 0x40000000 ) func makeCE(weights []int) (uint32, error) { @@ -57,10 +57,10 @@ func makeCE(weights []int) (uint32, error) { return 0, fmt.Errorf("makeCE: non-default secondary weight for non-zero primary: %X", weights) } ce = uint32(weights[0]<= minUnified && r <= maxUnified { // The most common case for CJK. return int(r) + commonUnifiedOffset diff --git a/src/pkg/exp/locale/collate/build/colelem_test.go b/src/pkg/exp/locale/collate/build/colelem_test.go index 841ac11629..0e4f3f14e8 100644 --- a/src/pkg/exp/locale/collate/build/colelem_test.go +++ b/src/pkg/exp/locale/collate/build/colelem_test.go @@ -29,9 +29,9 @@ func decompCE(in []int) (ce uint32, err error) { } var ceTests = []ceTest{ - {normalCE, []int{0, 0, 0}, 000}, - {normalCE, []int{0, 30, 3}, 0x1E03}, - {normalCE, []int{100, defaultSecondary, 3}, 0x40006403}, + {normalCE, []int{0, 0, 0}, 0x40000000}, + {normalCE, []int{0, 30, 3}, 0x40001E03}, + {normalCE, []int{100, defaultSecondary, 3}, 0x6403}, {normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-default secondary {normalCE, []int{100, 1, 3}, 0xFFFF}, {normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF}, diff --git a/src/pkg/exp/locale/collate/build/trie.go b/src/pkg/exp/locale/collate/build/trie.go index 894d29305e..480cc58d15 100644 --- a/src/pkg/exp/locale/collate/build/trie.go +++ b/src/pkg/exp/locale/collate/build/trie.go @@ -19,7 +19,10 @@ import ( "reflect" ) -const blockSize = 64 +const ( + blockSize = 64 + blockOffset = 2 // Substract 2 blocks to compensate for the 0x80 added to continuation bytes. +) type trie struct { index []uint16 @@ -102,7 +105,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 { if n.isInternal() { v, ok := index.lookupBlockIdx[h] if !ok { - v = int64(len(index.lookupBlocks)) + v = int64(len(index.lookupBlocks)) - blockOffset index.lookupBlocks = append(index.lookupBlocks, n) index.lookupBlockIdx[h] = v } @@ -110,7 +113,7 @@ func computeOffsets(index *nodeIndex, n *trieNode) int64 { } else { v, ok := index.valueBlockIdx[h] if !ok { - v = int64(len(index.valueBlocks)) + v = int64(len(index.valueBlocks)) - blockOffset index.valueBlocks = append(index.valueBlocks, n) index.valueBlockIdx[h] = v } diff --git a/src/pkg/exp/locale/collate/build/trie_test.go b/src/pkg/exp/locale/collate/build/trie_test.go index c530bb92f9..3ecbd841c5 100644 --- a/src/pkg/exp/locale/collate/build/trie_test.go +++ b/src/pkg/exp/locale/collate/build/trie_test.go @@ -79,24 +79,24 @@ var testLookup = [640]uint16 { // Block 0x1, offset 0x40 // Block 0x2, offset 0x80 // Block 0x3, offset 0xc0 - 0x0c2:0x03, 0x0c4:0x04, - 0x0c8:0x05, - 0x0df:0x06, - 0x0e0:0x04, - 0x0ef:0x05, - 0x0f0:0x07, 0x0f4:0x09, + 0x0c2:0x01, 0x0c4:0x02, + 0x0c8:0x03, + 0x0df:0x04, + 0x0e0:0x02, + 0x0ef:0x03, + 0x0f0:0x05, 0x0f4:0x07, // Block 0x4, offset 0x100 - 0x120:0x07, 0x126:0x08, + 0x120:0x05, 0x126:0x06, // Block 0x5, offset 0x140 - 0x17f:0x09, + 0x17f:0x07, // Block 0x6, offset 0x180 - 0x180:0x0a, 0x184:0x0b, + 0x180:0x08, 0x184:0x09, // Block 0x7, offset 0x1c0 - 0x1d0:0x06, + 0x1d0:0x04, // Block 0x8, offset 0x200 - 0x23f:0x0c, + 0x23f:0x0a, // Block 0x9, offset 0x240 - 0x24f:0x08, + 0x24f:0x06, } var testTrie = trie{ testLookup[:], testValues[:]} diff --git a/src/pkg/exp/locale/collate/colelem.go b/src/pkg/exp/locale/collate/colelem.go index 03cfc678e8..2cd6201737 100644 --- a/src/pkg/exp/locale/collate/colelem.go +++ b/src/pkg/exp/locale/collate/colelem.go @@ -68,17 +68,18 @@ func (ce colElem) ctype() ceType { // For normal collation elements, we assume that a collation element either has // a primary or non-default secondary value, not both. // Collation elements with a primary value are of the form -// 010ppppp pppppppp pppppppp tttttttt, where +// 000ppppp pppppppp pppppppp tttttttt, where // - p* is primary collation value // - t* is the tertiary collation value // Collation elements with a secondary value are of the form -// 00000000 ssssssss ssssssss tttttttt, where +// 01000000 ssssssss ssssssss tttttttt, where // - s* is the secondary collation value // - t* is the tertiary collation value func splitCE(ce colElem) weights { + const secondaryMask = 0x40000000 w := weights{} w.tertiary = uint8(ce) - if ce&0x40000000 != 0 { + if ce&secondaryMask == 0 { // primary weight form w.primary = uint32((ce >> 8) & 0x1FFFFF) w.secondary = defaultSecondary diff --git a/src/pkg/exp/locale/collate/colelem_test.go b/src/pkg/exp/locale/collate/colelem_test.go index b201f81457..dfc6bd9518 100644 --- a/src/pkg/exp/locale/collate/colelem_test.go +++ b/src/pkg/exp/locale/collate/colelem_test.go @@ -20,14 +20,14 @@ func makeCE(weights []int) colElem { maxPrimaryBits = 21 maxSecondaryBits = 16 maxTertiaryBits = 8 - isPrimary = 0x40000000 + isSecondary = 0x40000000 ) var ce colElem if weights[0] != 0 { ce = colElem(weights[0]< maxSparseEntries { - v = len(index.valueBlocks) + v = len(index.valueBlocks) - blockOffset index.valueBlocks = append(index.valueBlocks, n) index.valueBlockIdx[h] = v } else { @@ -295,7 +298,7 @@ func (t *trieNode) printTables(name string) int { } fmt.Print("\n}\n\n") - cutoff := len(index.valueBlocks) + cutoff := len(index.valueBlocks) - blockOffset ni := len(index.lookupBlocks) * blockSize fmt.Printf("// %sLookup: %d bytes\n", name, ni) fmt.Printf("// Block 0 is the null block.\n")