]> Cypherpunks repositories - gostls13.git/commitdiff
fix ", First" ", Last" ranges from UnicodeData.txt
authorRob Pike <r@golang.org>
Fri, 28 Aug 2009 18:57:38 +0000 (11:57 -0700)
committerRob Pike <r@golang.org>
Fri, 28 Aug 2009 18:57:38 +0000 (11:57 -0700)
R=rsc
DELTA=54  (38 added, 2 deleted, 14 changed)
OCL=34032
CL=34040

src/pkg/unicode/maketables.go
src/pkg/unicode/tables.go

index d1e599c402a66b8ce7f53e71e9234fffa58cd6f3..b5ae37324cfe7d301b73c461dd1174d735f73b2c 100644 (file)
@@ -115,7 +115,18 @@ var scripts = make(map[string] []Script)
 
 var lastChar uint32 = 0;
 
-func parseCategory(line string) {
+// In UnicodeData.txt, some ranges are marked like this:
+// 3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+// 4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+// parseCategory returns a state variable indicating the weirdness.
+type State int
+const (
+       SNormal State = iota;   // known to be zero for the type
+       SFirst;
+       SLast;
+)
+
+func parseCategory(line string) (state State) {
        field := strings.Split(line, ";", -1);
        if len(field) != NumField {
                die.Logf("%5s: %d fields (expected %d)\n", line, len(field), NumField);
@@ -155,6 +166,13 @@ func parseCategory(line string) {
        case "Lm", "Lo":
                char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]);
        }
+       switch {
+       case strings.Index(field[FName], ", First>") > 0:
+               state = SFirst
+       case strings.Index(field[FName], ", Last>") > 0:
+               state = SLast
+       }
+       return
 }
 
 func (char *Char) dump(s string) {
@@ -239,6 +257,7 @@ func printCategories() {
                die.Log("bad GET status for UnicodeData.txt", resp.Status);
        }
        input := bufio.NewReader(resp.Body);
+       var first uint32 = 0;
        for {
                line, err := input.ReadString('\n');
                if err != nil {
@@ -247,7 +266,26 @@ func printCategories() {
                        }
                        die.Log(err);
                }
-               parseCategory(line[0:len(line)-1]);
+               switch parseCategory(line[0:len(line)-1]) {
+               case SNormal:
+                       if first != 0 {
+                               die.Logf("bad state normal at U+%04X", lastChar)
+                       }
+               case SFirst:
+                       if first != 0 {
+                               die.Logf("bad state first at U+%04X", lastChar)
+                       }
+                       first = lastChar
+               case SLast:
+                       if first == 0 {
+                               die.Logf("bad state last at U+%04X", lastChar)
+                       }
+                       for i := first+1; i <= lastChar; i++ {
+                               chars[i] = chars[first];
+                               chars[i].codePoint = i;
+                       }
+                       first = 0
+               }
        }
        resp.Body.Close();
        // Find out which categories to dump
index a795799a620008ef7db2956b30623128ba6cb66a..e6fed36a47a2603606ce99e02125b17fe44683f2 100644 (file)
@@ -713,8 +713,8 @@ var letter = []Range {
        Range{0x3131, 0x318e, 1},
        Range{0x31a0, 0x31b7, 1},
        Range{0x31f0, 0x31ff, 1},
-       Range{0x3400, 0x4db5, 6581},
-       Range{0x4e00, 0x9fc3, 20931},
+       Range{0x3400, 0x4db5, 1},
+       Range{0x4e00, 0x9fc3, 1},
        Range{0xa000, 0xa48c, 1},
        Range{0xa500, 0xa60c, 1},
        Range{0xa610, 0xa61f, 1},
@@ -736,7 +736,7 @@ var letter = []Range {
        Range{0xaa00, 0xaa28, 1},
        Range{0xaa40, 0xaa42, 1},
        Range{0xaa44, 0xaa4b, 1},
-       Range{0xac00, 0xd7a3, 11171},
+       Range{0xac00, 0xd7a3, 1},
        Range{0xf900, 0xfa2d, 1},
        Range{0xfa30, 0xfa6a, 1},
        Range{0xfa70, 0xfad9, 1},
@@ -821,7 +821,7 @@ var letter = []Range {
        Range{0x1d78a, 0x1d7a8, 1},
        Range{0x1d7aa, 0x1d7c2, 1},
        Range{0x1d7c4, 0x1d7cb, 1},
-       Range{0x20000, 0x2a6d6, 42710},
+       Range{0x20000, 0x2a6d6, 1},
        Range{0x2f800, 0x2fa1d, 1},
 }
 
@@ -838,15 +838,13 @@ var _Zs = []Range {
 }
 
 var _Cs = []Range {
-       Range{0xd800, 0xdb7f, 895},
-       Range{0xdb80, 0xdbff, 127},
-       Range{0xdc00, 0xdfff, 1023},
+       Range{0xd800, 0xdfff, 1},
 }
 
 var _Co = []Range {
-       Range{0xe000, 0xf8ff, 6399},
-       Range{0xf0000, 0xffffd, 65533},
-       Range{0x100000, 0x10fffd, 65533},
+       Range{0xe000, 0xf8ff, 1},
+       Range{0xf0000, 0xffffd, 1},
+       Range{0x100000, 0x10fffd, 1},
 }
 
 var _Cf = []Range {
@@ -1686,8 +1684,8 @@ var _Lo = []Range {
        Range{0x3131, 0x318e, 1},
        Range{0x31a0, 0x31b7, 1},
        Range{0x31f0, 0x31ff, 1},
-       Range{0x3400, 0x4db5, 6581},
-       Range{0x4e00, 0x9fc3, 20931},
+       Range{0x3400, 0x4db5, 1},
+       Range{0x4e00, 0x9fc3, 1},
        Range{0xa000, 0xa014, 1},
        Range{0xa016, 0xa48c, 1},
        Range{0xa500, 0xa60b, 1},
@@ -1705,7 +1703,7 @@ var _Lo = []Range {
        Range{0xaa00, 0xaa28, 1},
        Range{0xaa40, 0xaa42, 1},
        Range{0xaa44, 0xaa4b, 1},
-       Range{0xac00, 0xd7a3, 11171},
+       Range{0xac00, 0xd7a3, 1},
        Range{0xf900, 0xfa2d, 1},
        Range{0xfa30, 0xfa6a, 1},
        Range{0xfa70, 0xfad9, 1},
@@ -1758,7 +1756,7 @@ var _Lo = []Range {
        Range{0x10a15, 0x10a17, 1},
        Range{0x10a19, 0x10a33, 1},
        Range{0x12000, 0x1236e, 1},
-       Range{0x20000, 0x2a6d6, 42710},
+       Range{0x20000, 0x2a6d6, 1},
        Range{0x2f800, 0x2fa1d, 1},
 }