exp/locale/collate/build: moved some of the code to the appropriate file, as

author Marcel van Lohuizen <mpvl@golang.org>

Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)

committer Marcel van Lohuizen <mpvl@golang.org>

Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)
author Marcel van Lohuizen <mpvl@golang.org>
Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)
committer Marcel van Lohuizen <mpvl@golang.org>
Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)
diff --git a/src/pkg/exp/locale/collate/build/builder.go b/src/pkg/exp/locale/collate/build/builder.go

index 3f3829190cc2183c26ac585cd6f819207cd80854..6372a8ef891774b8abe3f9f6d526f11311d04575 100644 (file)
--- a/src/pkg/exp/locale/collate/build/builder.go
+++ b/src/pkg/exp/locale/collate/build/builder.go
@@ -29,49 +29,6 @@ import (
  //   could analyze and detect when using a context makes sense, there is no
  //   need to expose this construct in the API.
  
-// entry is used to keep track of a single entry in the collation element table
-// during building. Examples of entries can be found in the Default Unicode
-// Collation Element Table.
-// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
-type entry struct {
-       runes []rune
-       elems [][]int // the collation elements for runes
-       str   string  // same as string(runes)
-
-       // prev, next, and level are used to keep track of tailorings.
-       prev, next *entry
-       level      collate.Level // next differs at this level
-
-       decompose bool // can use NFKD decomposition to generate elems
-       exclude   bool // do not include in table
-       logical   logicalAnchor
-
-       expansionIndex    int // used to store index into expansion table
-       contractionHandle ctHandle
-       contractionIndex  int // index into contraction elements
-}
-
-func (e *entry) String() string {
-       return fmt.Sprintf("%X -> %X (ch:%x; ci:%d, ei:%d)",
-               e.runes, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
-}
-
-func (e *entry) skip() bool {
-       return e.contraction()
-}
-
-func (e *entry) expansion() bool {
-       return !e.decompose && len(e.elems) > 1
-}
-
-func (e *entry) contraction() bool {
-       return len(e.runes) > 1
-}
-
-func (e *entry) contractionStarter() bool {
-       return e.contractionHandle.n != 0
-}
-
  // A Builder builds a root collation table.  The user must specify the
  // collation elements for each entry.  A common use will be to base the weights
  // on those specified in the allkeys* file as provided by the UCA or CLDR.
@@ -231,52 +188,6 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
         return nil
  }
  
-func (b *Builder) baseColElem(e *entry) uint32 {
-       ce := uint32(0)
-       var err error
-       switch {
-       case e.expansion():
-               ce, err = makeExpandIndex(e.expansionIndex)
-       default:
-               if e.decompose {
-                       log.Fatal("decompose should be handled elsewhere")
-               }
-               ce, err = makeCE(e.elems[0])
-       }
-       if err != nil {
-               b.error(fmt.Errorf("%s: %X -> %X", err, e.runes, e.elems))
-       }
-       return ce
-}
-
-func (b *Builder) colElem(e *entry) uint32 {
-       if e.skip() {
-               log.Fatal("cannot build colElem for entry that should be skipped")
-       }
-       ce := uint32(0)
-       var err error
-       switch {
-       case e.decompose:
-               t1 := e.elems[0][2]
-               t2 := 0
-               if len(e.elems) > 1 {
-                       t2 = e.elems[1][2]
-               }
-               ce, err = makeDecompose(t1, t2)
-       case e.contractionStarter():
-               ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
-       default:
-               if len(e.runes) > 1 {
-                       log.Fatal("colElem: contractions are handled in contraction trie")
-               }
-               ce = b.baseColElem(e)
-       }
-       if err != nil {
-               b.error(err)
-       }
-       return ce
-}
-
  func (b *Builder) error(e error) {
         if e != nil {
                 b.err = e
@@ -352,30 +263,6 @@ func reproducibleFromNFKD(e *entry, exp, nfkd [][]int) bool {
         return true
  }
  
-func equalCE(a, b []int) bool {
-       if len(a) != len(b) {
-               return false
-       }
-       for i := 0; i < 3; i++ {
-               if b[i] != a[i] {
-                       return false
-               }
-       }
-       return true
-}
-
-func equalCEArrays(a, b [][]int) bool {
-       if len(a) != len(b) {
-               return false
-       }
-       for i := range a {
-               if !equalCE(a[i], b[i]) {
-                       return false
-               }
-       }
-       return true
-}
-
  func (b *Builder) simplify() {
         // Runes that are a starter of a contraction should not be removed.
         // (To date, there is only Kannada character 0CCA.)
@@ -412,62 +299,6 @@ func (b *Builder) simplify() {
         }
  }
  
-// convertLargeWeights converts collation elements with large 
-// primaries (either double primaries or for illegal runes)
-// to our own representation.
-// A CJK character C is represented in the DUCET as
-//   [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
-// We will rewrite these characters to a single CE.
-// We assume the CJK values start at 0x8000.
-// See http://unicode.org/reports/tr10/#Implicit_Weights
-func convertLargeWeights(elems [][]int) (res [][]int, err error) {
-       const (
-               cjkPrimaryStart   = 0xFB40
-               rarePrimaryStart  = 0xFB80
-               otherPrimaryStart = 0xFBC0
-               illegalPrimary    = 0xFFFE
-               highBitsMask      = 0x3F
-               lowBitsMask       = 0x7FFF
-               lowBitsFlag       = 0x8000
-               shiftBits         = 15
-       )
-       for i := 0; i < len(elems); i++ {
-               ce := elems[i]
-               p := ce[0]
-               if p < cjkPrimaryStart {
-                       continue
-               }
-               if p > 0xFFFF {
-                       return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
-               }
-               if p >= illegalPrimary {
-                       ce[0] = illegalOffset + p - illegalPrimary
-               } else {
-                       if i+1 >= len(elems) {
-                               return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
-                       }
-                       if elems[i+1][0]&lowBitsFlag == 0 {
-                               return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
-                       }
-                       np := ((p & highBitsMask) << shiftBits) + elems[i+1][0]&lowBitsMask
-                       switch {
-                       case p < rarePrimaryStart:
-                               np += commonUnifiedOffset
-                       case p < otherPrimaryStart:
-                               np += rareUnifiedOffset
-                       default:
-                               p += otherOffset
-                       }
-                       ce[0] = np
-                       for j := i + 1; j+1 < len(elems); j++ {
-                               elems[j] = elems[j+1]
-                       }
-                       elems = elems[:len(elems)-1]
-               }
-       }
-       return elems, nil
-}
-
  // appendExpansion converts the given collation sequence to
  // collation elements and adds them to the expansion table.
  // It returns an index to the expansion table.
@@ -586,7 +417,9 @@ func (b *Builder) processContractions() {
                 es[0].contractionHandle = handle
                 // Add collation elements for contractions.
                 for _, e := range es {
-                       t.contractElem = append(t.contractElem, b.baseColElem(e))
+                       ce, err := e.encodeBase()
+                       b.error(err)
+                       t.contractElem = append(t.contractElem, ce)
                 }
         }
  }
@@ -596,7 +429,8 @@ func (b *Builder) buildTrie() {
         o := b.root
         for e := o.front(); e != nil; e, _ = e.nextIndexed() {
                 if !e.skip() {
-                       ce := b.colElem(e)
+                       ce, err := e.encode()
+                       b.error(err)
                         t.insert(e.runes[0], ce)
                 }
         }
diff --git a/src/pkg/exp/locale/collate/build/colelem.go b/src/pkg/exp/locale/collate/build/colelem.go

index 3f0f0c6c289213163765885ffee96e5808f20e82..343aa740a73041f3bf5cf4d6bd8260b85fe288b2 100644 (file)
--- a/src/pkg/exp/locale/collate/build/colelem.go
+++ b/src/pkg/exp/locale/collate/build/colelem.go
@@ -199,6 +199,62 @@ func implicitPrimary(r rune) int {
         return int(r) + otherOffset
  }
  
+// convertLargeWeights converts collation elements with large 
+// primaries (either double primaries or for illegal runes)
+// to our own representation.
+// A CJK character C is represented in the DUCET as
+//   [.FBxx.0020.0002.C][.BBBB.0000.0000.C]
+// We will rewrite these characters to a single CE.
+// We assume the CJK values start at 0x8000.
+// See http://unicode.org/reports/tr10/#Implicit_Weights
+func convertLargeWeights(elems [][]int) (res [][]int, err error) {
+       const (
+               cjkPrimaryStart   = 0xFB40
+               rarePrimaryStart  = 0xFB80
+               otherPrimaryStart = 0xFBC0
+               illegalPrimary    = 0xFFFE
+               highBitsMask      = 0x3F
+               lowBitsMask       = 0x7FFF
+               lowBitsFlag       = 0x8000
+               shiftBits         = 15
+       )
+       for i := 0; i < len(elems); i++ {
+               ce := elems[i]
+               p := ce[0]
+               if p < cjkPrimaryStart {
+                       continue
+               }
+               if p > 0xFFFF {
+                       return elems, fmt.Errorf("found primary weight %X; should be <= 0xFFFF", p)
+               }
+               if p >= illegalPrimary {
+                       ce[0] = illegalOffset + p - illegalPrimary
+               } else {
+                       if i+1 >= len(elems) {
+                               return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
+                       }
+                       if elems[i+1][0]&lowBitsFlag == 0 {
+                               return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
+                       }
+                       np := ((p & highBitsMask) << shiftBits) + elems[i+1][0]&lowBitsMask
+                       switch {
+                       case p < rarePrimaryStart:
+                               np += commonUnifiedOffset
+                       case p < otherPrimaryStart:
+                               np += rareUnifiedOffset
+                       default:
+                               p += otherOffset
+                       }
+                       ce[0] = np
+                       for j := i + 1; j+1 < len(elems); j++ {
+                               elems[j] = elems[j+1]
+                       }
+                       elems = elems[:len(elems)-1]
+               }
+       }
+       return elems, nil
+}
+
  // nextWeight computes the first possible collation weights following elems
  // for the given level.
  func nextWeight(level collate.Level, elems [][]int) [][]int {
@@ -247,3 +303,27 @@ func compareWeights(a, b [][]int) (result int, level collate.Level) {
         }
         return 0, collate.Identity
  }
+
+func equalCE(a, b []int) bool {
+       if len(a) != len(b) {
+               return false
+       }
+       for i := 0; i < 3; i++ {
+               if b[i] != a[i] {
+                       return false
+               }
+       }
+       return true
+}
+
+func equalCEArrays(a, b [][]int) bool {
+       if len(a) != len(b) {
+               return false
+       }
+       for i := range a {
+               if !equalCE(a[i], b[i]) {
+                       return false
+               }
+       }
+       return true
+}
diff --git a/src/pkg/exp/locale/collate/build/order.go b/src/pkg/exp/locale/collate/build/order.go

index 014141c6d6c0f5809575b671955cef58552be0f8..f09881608d9f90ebae08f60267733c62baa25dd9 100644 (file)
--- a/src/pkg/exp/locale/collate/build/order.go
+++ b/src/pkg/exp/locale/collate/build/order.go
@@ -21,7 +21,48 @@ const (
         lastAnchor                = 1
  )
  
-// TODO: move type entry from builder.go to this file.
+// entry is used to keep track of a single entry in the collation element table
+// during building. Examples of entries can be found in the Default Unicode
+// Collation Element Table.
+// See http://www.unicode.org/Public/UCA/6.0.0/allkeys.txt.
+type entry struct {
+       runes []rune
+       elems [][]int // the collation elements for runes
+       str   string  // same as string(runes)
+
+       // prev, next, and level are used to keep track of tailorings.
+       prev, next *entry
+       level      collate.Level // next differs at this level
+
+       decompose bool // can use NFKD decomposition to generate elems
+       exclude   bool // do not include in table
+       logical   logicalAnchor
+
+       expansionIndex    int // used to store index into expansion table
+       contractionHandle ctHandle
+       contractionIndex  int // index into contraction elements
+}
+
+func (e *entry) String() string {
+       return fmt.Sprintf("%X -> %X (ch:%x; ci:%d, ei:%d)",
+               e.runes, e.elems, e.contractionHandle, e.contractionIndex, e.expansionIndex)
+}
+
+func (e *entry) skip() bool {
+       return e.contraction()
+}
+
+func (e *entry) expansion() bool {
+       return !e.decompose && len(e.elems) > 1
+}
+
+func (e *entry) contraction() bool {
+       return len(e.runes) > 1
+}
+
+func (e *entry) contractionStarter() bool {
+       return e.contractionHandle.n != 0
+}
  
  // nextIndexed gets the next entry that needs to be stored in the table.
  // It returns the entry and the collation level at which the next entry differs
@@ -72,6 +113,42 @@ func (e *entry) insertAfter(n *entry) {
         e.next = n
  }
  
+func (e *entry) encodeBase() (ce uint32, err error) {
+       switch {
+       case e.expansion():
+               ce, err = makeExpandIndex(e.expansionIndex)
+       default:
+               if e.decompose {
+                       log.Fatal("decompose should be handled elsewhere")
+               }
+               ce, err = makeCE(e.elems[0])
+       }
+       return
+}
+
+func (e *entry) encode() (ce uint32, err error) {
+       if e.skip() {
+               log.Fatal("cannot build colElem for entry that should be skipped")
+       }
+       switch {
+       case e.decompose:
+               t1 := e.elems[0][2]
+               t2 := 0
+               if len(e.elems) > 1 {
+                       t2 = e.elems[1][2]
+               }
+               ce, err = makeDecompose(t1, t2)
+       case e.contractionStarter():
+               ce, err = makeContractIndex(e.contractionHandle, e.contractionIndex)
+       default:
+               if len(e.runes) > 1 {
+                       log.Fatal("colElem: contractions are handled in contraction trie")
+               }
+               ce, err = e.encodeBase()
+       }
+       return
+}
+
  // entryLess returns true if a sorts before b and false otherwise.
  func entryLess(a, b *entry) bool {
         if res, _ := compareWeights(a.elems, b.elems); res != 0 {
author	Marcel van Lohuizen <mpvl@golang.org>
	Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)
committer	Marcel van Lohuizen <mpvl@golang.org>
	Thu, 6 Sep 2012 04:16:02 +0000 (13:16 +0900)
src/pkg/exp/locale/collate/build/builder.go		patch \| blob \| history
src/pkg/exp/locale/collate/build/colelem.go		patch \| blob \| history
src/pkg/exp/locale/collate/build/order.go		patch \| blob \| history