From: Rob Pike Date: Fri, 28 Aug 2009 18:57:38 +0000 (-0700) Subject: fix ", First" ", Last" ranges from UnicodeData.txt X-Git-Tag: weekly.2009-11-06~728 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=f59ae064ba5c4766ade933685bed983665052ab2;p=gostls13.git fix ", First" ", Last" ranges from UnicodeData.txt R=rsc DELTA=54 (38 added, 2 deleted, 14 changed) OCL=34032 CL=34040 --- diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index d1e599c402..b5ae37324c 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -115,7 +115,18 @@ var scripts = make(map[string] []Script) var lastChar uint32 = 0; -func parseCategory(line string) { +// In UnicodeData.txt, some ranges are marked like this: +// 3400;;Lo;0;L;;;;;N;;;;; +// 4DB5;;Lo;0;L;;;;;N;;;;; +// parseCategory returns a state variable indicating the weirdness. +type State int +const ( + SNormal State = iota; // known to be zero for the type + SFirst; + SLast; +) + +func parseCategory(line string) (state State) { field := strings.Split(line, ";", -1); if len(field) != NumField { die.Logf("%5s: %d fields (expected %d)\n", line, len(field), NumField); @@ -155,6 +166,13 @@ func parseCategory(line string) { case "Lm", "Lo": char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]); } + switch { + case strings.Index(field[FName], ", First>") > 0: + state = SFirst + case strings.Index(field[FName], ", Last>") > 0: + state = SLast + } + return } func (char *Char) dump(s string) { @@ -239,6 +257,7 @@ func printCategories() { die.Log("bad GET status for UnicodeData.txt", resp.Status); } input := bufio.NewReader(resp.Body); + var first uint32 = 0; for { line, err := input.ReadString('\n'); if err != nil { @@ -247,7 +266,26 @@ func printCategories() { } die.Log(err); } - parseCategory(line[0:len(line)-1]); + switch parseCategory(line[0:len(line)-1]) { + case SNormal: + if first != 0 { + die.Logf("bad state normal at U+%04X", lastChar) + } + case SFirst: + if first != 0 { + die.Logf("bad state first at U+%04X", lastChar) + } + first = lastChar + case SLast: + if first == 0 { + die.Logf("bad state last at U+%04X", lastChar) + } + for i := first+1; i <= lastChar; i++ { + chars[i] = chars[first]; + chars[i].codePoint = i; + } + first = 0 + } } resp.Body.Close(); // Find out which categories to dump diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go index a795799a62..e6fed36a47 100644 --- a/src/pkg/unicode/tables.go +++ b/src/pkg/unicode/tables.go @@ -713,8 +713,8 @@ var letter = []Range { Range{0x3131, 0x318e, 1}, Range{0x31a0, 0x31b7, 1}, Range{0x31f0, 0x31ff, 1}, - Range{0x3400, 0x4db5, 6581}, - Range{0x4e00, 0x9fc3, 20931}, + Range{0x3400, 0x4db5, 1}, + Range{0x4e00, 0x9fc3, 1}, Range{0xa000, 0xa48c, 1}, Range{0xa500, 0xa60c, 1}, Range{0xa610, 0xa61f, 1}, @@ -736,7 +736,7 @@ var letter = []Range { Range{0xaa00, 0xaa28, 1}, Range{0xaa40, 0xaa42, 1}, Range{0xaa44, 0xaa4b, 1}, - Range{0xac00, 0xd7a3, 11171}, + Range{0xac00, 0xd7a3, 1}, Range{0xf900, 0xfa2d, 1}, Range{0xfa30, 0xfa6a, 1}, Range{0xfa70, 0xfad9, 1}, @@ -821,7 +821,7 @@ var letter = []Range { Range{0x1d78a, 0x1d7a8, 1}, Range{0x1d7aa, 0x1d7c2, 1}, Range{0x1d7c4, 0x1d7cb, 1}, - Range{0x20000, 0x2a6d6, 42710}, + Range{0x20000, 0x2a6d6, 1}, Range{0x2f800, 0x2fa1d, 1}, } @@ -838,15 +838,13 @@ var _Zs = []Range { } var _Cs = []Range { - Range{0xd800, 0xdb7f, 895}, - Range{0xdb80, 0xdbff, 127}, - Range{0xdc00, 0xdfff, 1023}, + Range{0xd800, 0xdfff, 1}, } var _Co = []Range { - Range{0xe000, 0xf8ff, 6399}, - Range{0xf0000, 0xffffd, 65533}, - Range{0x100000, 0x10fffd, 65533}, + Range{0xe000, 0xf8ff, 1}, + Range{0xf0000, 0xffffd, 1}, + Range{0x100000, 0x10fffd, 1}, } var _Cf = []Range { @@ -1686,8 +1684,8 @@ var _Lo = []Range { Range{0x3131, 0x318e, 1}, Range{0x31a0, 0x31b7, 1}, Range{0x31f0, 0x31ff, 1}, - Range{0x3400, 0x4db5, 6581}, - Range{0x4e00, 0x9fc3, 20931}, + Range{0x3400, 0x4db5, 1}, + Range{0x4e00, 0x9fc3, 1}, Range{0xa000, 0xa014, 1}, Range{0xa016, 0xa48c, 1}, Range{0xa500, 0xa60b, 1}, @@ -1705,7 +1703,7 @@ var _Lo = []Range { Range{0xaa00, 0xaa28, 1}, Range{0xaa40, 0xaa42, 1}, Range{0xaa44, 0xaa4b, 1}, - Range{0xac00, 0xd7a3, 11171}, + Range{0xac00, 0xd7a3, 1}, Range{0xf900, 0xfa2d, 1}, Range{0xfa30, 0xfa6a, 1}, Range{0xfa70, 0xfad9, 1}, @@ -1758,7 +1756,7 @@ var _Lo = []Range { Range{0x10a15, 0x10a17, 1}, Range{0x10a19, 0x10a33, 1}, Range{0x12000, 0x1236e, 1}, - Range{0x20000, 0x2a6d6, 42710}, + Range{0x20000, 0x2a6d6, 1}, Range{0x2f800, 0x2fa1d, 1}, }