From: Rob Pike Date: Fri, 28 Aug 2009 00:04:23 +0000 (-0700) Subject: add scripts tables to the unicode package X-Git-Tag: weekly.2009-11-06~739 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8b6274eb9f54e2c631e6684f7dbd201bb30c1434;p=gostls13.git add scripts tables to the unicode package R=rsc DELTA=1479 (1422 added, 1 deleted, 56 changed) OCL=33993 CL=33997 --- diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go index 8e91276bf7..1cdfed6790 100644 --- a/src/pkg/unicode/maketables.go +++ b/src/pkg/unicode/maketables.go @@ -16,24 +16,30 @@ import ( "os"; "strconv"; "strings"; + "regexp"; "unicode"; ) +var dataUrl = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt"); var url = flag.String("url", - "http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt", - "URL of Unicode database") -var tables = flag.String("tables", + "http://www.unicode.org/Public/5.1.0/ucd/", + "URL of Unicode database directory") +var tablelist = flag.String("tables", "all", - "comma-separated list of which tables to generate; default is all; can be letter"); + "comma-separated list of which tables to generate; can be letter"); +var scriptlist = flag.String("scripts", + "all", + "comma-separated list of which script tables to generate"); var test = flag.Bool("test", false, "test existing tables; can be used to compare web data with package data"); +var scriptRe *regexp.Regexp var die = log.New(os.Stderr, nil, "", log.Lexit|log.Lshortfile); var category = map[string] bool{ "letter":true } // Nd Lu etc. letter is a special case -// Data has form: +// UnicodeData.txt has form: // 0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;; // 007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A // See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation @@ -87,11 +93,28 @@ type Char struct { titleCase uint32; } +// Scripts.txt has form: +// A673 ; Cyrillic # Po SLAVONIC ASTERISK +// A67C..A67D ; Cyrillic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +// See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation + +type Script struct { + lo, hi uint32; // range of code points + script string; +} + +func main() { + flag.Parse(); + printCategories(); + printScripts(); +} + var chars = make([]Char, MaxChar) +var scripts = make(map[string] []Script) var lastChar uint32 = 0; -func parse(line string) { +func parseCategory(line string) { field := strings.Split(line, ";", -1); if len(field) != NumField { die.Logf("%5s: %d fields (expected %d)\n", line, len(field), NumField); @@ -169,6 +192,16 @@ func allCategories() []string { return a; } +func allScripts() []string { + a := make([]string, len(scripts)); + i := 0; + for k := range scripts { + a[i] = k; + i++; + } + return a; +} + // Extract the version number from the URL func version() string { // Break on slashes and look for the first numeric field @@ -190,35 +223,39 @@ func letterOp(code int) bool { return false } -func main() { - flag.Parse(); - - resp, _, err := http.Get(*url); +func printCategories() { + if *tablelist == "" { + return + } + if *dataUrl == "" { + flag.Set("data", *url + "UnicodeData.txt"); + } + resp, _, err := http.Get(*dataUrl); if err != nil { die.Log(err); } if resp.StatusCode != 200 { - die.Log("bad GET status", resp.StatusCode); + die.Log("bad GET status for UnicodeData.txt", resp.StatusCode); } input := bufio.NewReader(resp.Body); for { - line, err := input.ReadString('\n', false); + line, err := input.ReadString('\n'); if err != nil { if err == os.EOF { break; } die.Log(err); } - parse(line); + parseCategory(line[0:len(line)-1]); } resp.Body.Close(); // Find out which categories to dump - list := strings.Split(*tables, ",", 0); - if *tables == "all" { - list = allCategories(); + list := strings.Split(*tablelist, ",", 0); + if *tablelist == "all" { + list = allCategories() } if *test { - fullTest(list); + fullCategoryTest(list); return } fmt.Printf( @@ -226,16 +263,16 @@ func main() { "// maketables --tables=%s --url=%s\n" "// DO NOT EDIT\n\n" "package unicode\n\n", - *tables, + *tablelist, *url ); fmt.Println("// Version is the Unicode edition from which the tables are derived."); fmt.Printf("const Version = %q\n\n", version()); - if *tables == "all" { - fmt.Println("// Tables is the set of Unicode data tables."); - fmt.Println("var Tables = map[string] []Range {"); + if *tablelist == "all" { + fmt.Println("// Categories is the set of Unicode data tables."); + fmt.Println("var Categories = map[string] []Range {"); for k, _ := range category { fmt.Printf("\t%q: %s,\n", k, k); } @@ -284,7 +321,7 @@ func main() { } dumpRange( fmt.Sprintf( - "// %s is the set of Unicode characters in category %s\n" + "// %s is the set of Unicode characters in category %s.\n" "var %s = _%s\n" "var _%s = []Range {\n", name, name, name, name, name @@ -296,10 +333,10 @@ func main() { } type Op func(code int) bool +const format = "\tRange{0x%04x, 0x%04x, %d},\n"; func dumpRange(header string, inCategory Op, trailer string) { fmt.Print(header); - const format = "\tRange{0x%04x, 0x%04x, %d},\n"; next := 0; // one Range for each iteration for { @@ -348,12 +385,12 @@ func dumpRange(header string, inCategory Op, trailer string) { fmt.Print(trailer); } -func fullTest(list []string) { +func fullCategoryTest(list []string) { for _, name := range list { if _, ok := category[name]; !ok { die.Log("unknown category", name); } - r, ok := unicode.Tables[name]; + r, ok := unicode.Categories[name]; if !ok { die.Log("unknown table", name); } @@ -378,3 +415,147 @@ func verifyRange(name string, inCategory Op, table []unicode.Range) { } } } + +func parseScript(line string) { + comment := strings.Index(line, "#"); + if comment >= 0 { + line = line[0:comment] + } + line = strings.TrimSpaceASCII(line); + if len(line) == 0 { + return + } + field := strings.Split(line, ";", -1); + if len(field) != 2 { + die.Logf("%s: %d fields (expected 2)\n", line, len(field)); + } + matches := scriptRe.MatchStrings(line); + if len(matches) != 4 { + die.Logf("%s: %d matches (expected 3)\n", line, len(matches)); + } + lo, err := strconv.Btoui64(matches[1], 16); + if err != nil { + die.Log("%.5s...:", err) + } + hi := lo; + if len(matches[2]) > 2 { // ignore leading .. + hi, err = strconv.Btoui64(matches[2][2:len(matches[2])], 16); + if err != nil { + die.Log("%.5s...:", err) + } + } + name := matches[3]; + s, ok := scripts[name]; + if len(s) == cap(s) { + ns := make([]Script, len(s), len(s)+100); + for i, sc := range s { + ns[i] = sc + } + s = ns; + } + s = s[0:len(s)+1]; + s[len(s)-1] = Script{ uint32(lo), uint32(hi), name }; + scripts[name] = s; +} + +func printScripts() { + var err os.Error; + scriptRe, err = regexp.Compile(`([0-9A-F]+)(\.\.[0-9A-F]+)? +; ([A-Za-z_]+)`); + if err != nil { + die.Log("re error:", err) + } + resp, _, err := http.Get(*url + "Scripts.txt"); + if err != nil { + die.Log(err); + } + if resp.StatusCode != 200 { + die.Log("bad GET status for Scripts.txt", resp.Status); + } + input := bufio.NewReader(resp.Body); + for { + line, err := input.ReadString('\n'); + if err != nil { + if err == os.EOF { + break; + } + die.Log(err); + } + parseScript(line[0:len(line)-1]); + } + resp.Body.Close(); + + // Find out which scripts to dump + list := strings.Split(*scriptlist, ",", 0); + if *scriptlist == "all" { + list = allScripts(); + } + if *test { + fullScriptTest(list); + return; + } + + fmt.Printf( + "// Generated by running\n" + "// maketables --scripts=%s --url=%s\n" + "// DO NOT EDIT\n\n", + *scriptlist, + *url + ); + if *scriptlist == "all" { + fmt.Println("// Scripts is the set of Unicode script tables."); + fmt.Println("var Scripts = map[string] []Range {"); + for k, _ := range scripts { + fmt.Printf("\t%q: %s,\n", k, k); + } + fmt.Printf("}\n\n"); + } + + for _, name := range list { + fmt.Printf( + "// %s is the set of Unicode characters in script %s.\n" + "var %s = _%s\n" + "var _%s = []Range {\n", + name, name, name, name, name + ); + ranges := foldAdjacent(scripts[name]); + for _, s := range ranges { + fmt.Printf(format, s.Lo, s.Hi, s.Stride); + } + fmt.Printf("}\n\n"); + } +} + +// The script tables have a lot of adjacent elements. Fold them together. +func foldAdjacent(r []Script) []unicode.Range { + s := make([]unicode.Range, 0, len(r)); + j := 0; + for i := 0; i < len(r); i++ { + if j>0 && int(r[i].lo) == s[j-1].Hi+1 { + s[j-1].Hi = int(r[i].hi); + } else { + s = s[0:j+1]; + s[j] = unicode.Range{int(r[i].lo), int(r[i].hi), 1}; + j++; + } + } + return s; +} + +func fullScriptTest(list []string) { + for _, name := range list { + if _, ok := scripts[name]; !ok { + die.Log("unknown script", name); + } + r, ok := unicode.Scripts[name]; + if !ok { + die.Log("unknown table", name); + } + for _, script := range scripts[name] { + for r := script.lo; r <= script.hi; r++ { + if !unicode.Is(unicode.Scripts[name], int(r)) { + fmt.Fprintf(os.Stderr, "U+%04X: not in script %s\n", r, name); + } + } + } + } +} diff --git a/src/pkg/unicode/script_test.go b/src/pkg/unicode/script_test.go new file mode 100644 index 0000000000..f0c5fa6b9b --- /dev/null +++ b/src/pkg/unicode/script_test.go @@ -0,0 +1,175 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package unicode + +import "testing" + +type T struct { + rune int; + script string; +} + +// Hand-chosen tests from Unicode 5.1.0, mostly to discover when new +// scripts and categories arise. +var inTest = []T { + T{0x06e2, "Arabic"}, + T{0x0567, "Armenian"}, + T{0x1b37, "Balinese"}, + T{0x09c2, "Bengali"}, + T{0x3115, "Bopomofo"}, + T{0x282d, "Braille"}, + T{0x1a1a, "Buginese"}, + T{0x1747, "Buhid"}, + T{0x156d, "Canadian_Aboriginal"}, + T{0x102a9, "Carian"}, + T{0xaa4d, "Cham"}, + T{0x13c2, "Cherokee"}, + T{0x0020, "Common"}, + T{0x1d4a5, "Common"}, + T{0x2cfc, "Coptic"}, + T{0x12420, "Cuneiform"}, + T{0x1080c, "Cypriot"}, + T{0xa663, "Cyrillic"}, + T{0x10430, "Deseret"}, + T{0x094a, "Devanagari"}, + T{0x1271, "Ethiopic"}, + T{0x10fc, "Georgian"}, + T{0x2c40, "Glagolitic"}, + T{0x10347, "Gothic"}, + T{0x03ae, "Greek"}, + T{0x0abf, "Gujarati"}, + T{0x0a24, "Gurmukhi"}, + T{0x3028, "Han"}, + T{0x11b8, "Hangul"}, + T{0x1727, "Hanunoo"}, + T{0x05a0, "Hebrew"}, + T{0x3058, "Hiragana"}, + T{0x20e6, "Inherited"}, + T{0x0cbd, "Kannada"}, + T{0x30a6, "Katakana"}, + T{0xa928, "Kayah_Li"}, + T{0x10a11, "Kharoshthi"}, + T{0x17c6, "Khmer"}, + T{0x0eaa, "Lao"}, + T{0x1d79, "Latin"}, + T{0x1c10, "Lepcha"}, + T{0x1930, "Limbu"}, + T{0x1003c, "Linear_B"}, + T{0x10290, "Lycian"}, + T{0x10930, "Lydian"}, + T{0x0d42, "Malayalam"}, + T{0x1822, "Mongolian"}, + T{0x104c, "Myanmar"}, + T{0x19c3, "New_Tai_Lue"}, + T{0x07f8, "Nko"}, + T{0x169b, "Ogham"}, + T{0x1c6a, "Ol_Chiki"}, + T{0x10310, "Old_Italic"}, + T{0x103c9, "Old_Persian"}, + T{0x0b3e, "Oriya"}, + T{0x10491, "Osmanya"}, + T{0xa860, "Phags_Pa"}, + T{0x10918, "Phoenician"}, + T{0xa949, "Rejang"}, + T{0x16c0, "Runic"}, + T{0xa892, "Saurashtra"}, + T{0x10463, "Shavian"}, + T{0x0dbd, "Sinhala"}, + T{0x1ba3, "Sundanese"}, + T{0xa803, "Syloti_Nagri"}, + T{0x070f, "Syriac"}, + T{0x170f, "Tagalog"}, + T{0x176f, "Tagbanwa"}, + T{0x1972, "Tai_Le"}, + T{0x0bbf, "Tamil"}, + T{0x0c55, "Telugu"}, + T{0x07a7, "Thaana"}, + T{0x0e46, "Thai"}, + T{0x0f36, "Tibetan"}, + T{0x2d55, "Tifinagh"}, + T{0x10388, "Ugaritic"}, + T{0xa60e, "Vai"}, + T{0xa216, "Yi"}, +} + +var outTest = []T { // not really worth being thorough + T{0x20, "Telugu"} +} + +var inCategoryTest = []T { + T{0x0081, "Cc"}, + T{0x17b4, "Cf"}, + T{0xf0000, "Co"}, + T{0xdb80, "Cs"}, + T{0x0236, "Ll"}, + T{0x1d9d, "Lm"}, + T{0x07cf, "Lo"}, + T{0x1f8a, "Lt"}, + T{0x03ff, "Lu"}, + T{0x0bc1, "Mc"}, + T{0x20df, "Me"}, + T{0x07f0, "Mn"}, + T{0x1bb2, "Nd"}, + T{0x10147, "Nl"}, + T{0x2478, "No"}, + T{0xfe33, "Pc"}, + T{0x2011, "Pd"}, + T{0x301e, "Pe"}, + T{0x2e03, "Pf"}, + T{0x2e02, "Pi"}, + T{0x0022, "Po"}, + T{0x2770, "Ps"}, + T{0x00a4, "Sc"}, + T{0xa711, "Sk"}, + T{0x25f9, "Sm"}, + T{0x2108, "So"}, + T{0x2028, "Zl"}, + T{0x2029, "Zp"}, + T{0x202f, "Zs"}, + T{0x04aa, "letter"}, +} + +func TestScripts(t *testing.T) { + for i, test := range inTest { + if !Is(Scripts[test.script], test.rune) { + t.Errorf("IsScript(%#x, %s) = false, want true\n", test.rune, test.script); + } + } + for i, test := range outTest { + if Is(Scripts[test.script], test.rune) { + t.Errorf("IsScript(%#x, %s) = true, want false\n", test.rune, test.script); + } + } + tested := make(map[string] bool); + for k := range Scripts { + tested[k] = true + } + for _, test := range inTest { + tested[test.script] = false, false + } + for k := range tested { + t.Error("not tested:", k) + } +} + + +func TestCategories(t *testing.T) { + for i, test := range inCategoryTest { + if !Is(Categories[test.script], test.rune) { + t.Errorf("IsCategory(%#x, %s) = false, want true\n", test.rune, test.script); + } + } + tested := make(map[string] bool); + for k := range Categories { + tested[k] = true + } + for _, test := range inCategoryTest { + tested[test.script] = false, false + } + for k := range tested { + t.Error("not tested:", k) + } +} + diff --git a/src/pkg/unicode/tables.go b/src/pkg/unicode/tables.go index 61baaa7618..767c88df85 100644 --- a/src/pkg/unicode/tables.go +++ b/src/pkg/unicode/tables.go @@ -1,5 +1,5 @@ // Generated by running -// maketables --tables=all --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt +// maketables --tables=all --url=http://www.unicode.org/Public/5.1.0/ucd/ // DO NOT EDIT package unicode @@ -7,8 +7,8 @@ package unicode // Version is the Unicode edition from which the tables are derived. const Version = "5.1.0" -// Tables is the set of Unicode data tables. -var Tables = map[string] []Range { +// Categories is the set of Unicode data tables. +var Categories = map[string] []Range { "Lm": Lm, "Ll": Ll, "Me": Me, @@ -41,7 +41,7 @@ var Tables = map[string] []Range { "Lo": Lo, } -// Lm is the set of Unicode characters in category Lm +// Lm is the set of Unicode characters in category Lm. var Lm = _Lm var _Lm = []Range { Range{0x02b0, 0x02c1, 1}, @@ -79,7 +79,7 @@ var _Lm = []Range { // Lower is the set of Unicode lower case letters. var Lower = Ll -// Ll is the set of Unicode characters in category Ll +// Ll is the set of Unicode characters in category Ll. var Ll = _Ll var _Ll = []Range { Range{0x0061, 0x007a, 1}, @@ -221,7 +221,7 @@ var _Ll = []Range { Range{0x1d7cb, 0x1d7cb, 1}, } -// Me is the set of Unicode characters in category Me +// Me is the set of Unicode characters in category Me. var Me = _Me var _Me = []Range { Range{0x0488, 0x0489, 1}, @@ -231,7 +231,7 @@ var _Me = []Range { Range{0xa670, 0xa672, 1}, } -// Mc is the set of Unicode characters in category Mc +// Mc is the set of Unicode characters in category Mc. var Mc = _Mc var _Mc = []Range { Range{0x0903, 0x093e, 59}, @@ -314,7 +314,7 @@ var _Mc = []Range { Range{0x1d16e, 0x1d172, 1}, } -// Mn is the set of Unicode characters in category Mn +// Mn is the set of Unicode characters in category Mn. var Mn = _Mn var _Mn = []Range { Range{0x0300, 0x036f, 1}, @@ -461,7 +461,7 @@ var _Mn = []Range { Range{0xe0100, 0xe01ef, 1}, } -// Zl is the set of Unicode characters in category Zl +// Zl is the set of Unicode characters in category Zl. var Zl = _Zl var _Zl = []Range { Range{0x2028, 0x2028, 1}, @@ -843,13 +843,13 @@ var letter = []Range { Range{0x20000, 0x2a6d6, 42710}, Range{0x2f800, 0x2fa1d, 1}, } -// Zp is the set of Unicode characters in category Zp +// Zp is the set of Unicode characters in category Zp. var Zp = _Zp var _Zp = []Range { Range{0x2029, 0x2029, 1}, } -// Zs is the set of Unicode characters in category Zs +// Zs is the set of Unicode characters in category Zs. var Zs = _Zs var _Zs = []Range { Range{0x0020, 0x00a0, 128}, @@ -859,7 +859,7 @@ var _Zs = []Range { Range{0x3000, 0x3000, 1}, } -// Cs is the set of Unicode characters in category Cs +// Cs is the set of Unicode characters in category Cs. var Cs = _Cs var _Cs = []Range { Range{0xd800, 0xdb7f, 895}, @@ -867,7 +867,7 @@ var _Cs = []Range { Range{0xdc00, 0xdfff, 1023}, } -// Co is the set of Unicode characters in category Co +// Co is the set of Unicode characters in category Co. var Co = _Co var _Co = []Range { Range{0xe000, 0xf8ff, 6399}, @@ -875,7 +875,7 @@ var _Co = []Range { Range{0x100000, 0x10fffd, 65533}, } -// Cf is the set of Unicode characters in category Cf +// Cf is the set of Unicode characters in category Cf. var Cf = _Cf var _Cf = []Range { Range{0x00ad, 0x0600, 1363}, @@ -893,14 +893,14 @@ var _Cf = []Range { Range{0xe0021, 0xe007f, 1}, } -// Cc is the set of Unicode characters in category Cc +// Cc is the set of Unicode characters in category Cc. var Cc = _Cc var _Cc = []Range { Range{0x0001, 0x001f, 1}, Range{0x007f, 0x009f, 1}, } -// Po is the set of Unicode characters in category Po +// Po is the set of Unicode characters in category Po. var Po = _Po var _Po = []Range { Range{0x0021, 0x0023, 1}, @@ -999,7 +999,7 @@ var _Po = []Range { Range{0x12470, 0x12473, 1}, } -// Pi is the set of Unicode characters in category Pi +// Pi is the set of Unicode characters in category Pi. var Pi = _Pi var _Pi = []Range { Range{0x00ab, 0x2018, 8045}, @@ -1010,7 +1010,7 @@ var _Pi = []Range { Range{0x2e1c, 0x2e20, 4}, } -// Pf is the set of Unicode characters in category Pf +// Pf is the set of Unicode characters in category Pf. var Pf = _Pf var _Pf = []Range { Range{0x00bb, 0x2019, 8030}, @@ -1020,7 +1020,7 @@ var _Pf = []Range { Range{0x2e1d, 0x2e21, 4}, } -// Pe is the set of Unicode characters in category Pe +// Pe is the set of Unicode characters in category Pe. var Pe = _Pe var _Pe = []Range { Range{0x0029, 0x005d, 52}, @@ -1046,7 +1046,7 @@ var _Pe = []Range { Range{0xff5d, 0xff63, 3}, } -// Pd is the set of Unicode characters in category Pd +// Pd is the set of Unicode characters in category Pd. var Pd = _Pd var _Pd = []Range { Range{0x002d, 0x058a, 1373}, @@ -1059,7 +1059,7 @@ var _Pd = []Range { Range{0xfe63, 0xff0d, 170}, } -// Pc is the set of Unicode characters in category Pc +// Pc is the set of Unicode characters in category Pc. var Pc = _Pc var _Pc = []Range { Range{0x005f, 0x203f, 8160}, @@ -1069,7 +1069,7 @@ var _Pc = []Range { Range{0xff3f, 0xff3f, 1}, } -// Ps is the set of Unicode characters in category Ps +// Ps is the set of Unicode characters in category Ps. var Ps = _Ps var _Ps = []Range { Range{0x0028, 0x005b, 51}, @@ -1101,7 +1101,7 @@ var _Ps = []Range { // Digit is the set of Unicode characters with the "decimal digit" property. var Digit = Nd -// Nd is the set of Unicode characters in category Nd +// Nd is the set of Unicode characters in category Nd. var Nd = _Nd var _Nd = []Range { Range{0x0030, 0x0039, 1}, @@ -1139,7 +1139,7 @@ var _Nd = []Range { Range{0x1d7ce, 0x1d7ff, 1}, } -// Nl is the set of Unicode characters in category Nl +// Nl is the set of Unicode characters in category Nl. var Nl = _Nl var _Nl = []Range { Range{0x16ee, 0x16f0, 1}, @@ -1154,7 +1154,7 @@ var _Nl = []Range { Range{0x12400, 0x12462, 1}, } -// No is the set of Unicode characters in category No +// No is the set of Unicode characters in category No. var No = _No var _No = []Range { Range{0x00b2, 0x00b3, 1}, @@ -1189,7 +1189,7 @@ var _No = []Range { Range{0x1d360, 0x1d371, 1}, } -// So is the set of Unicode characters in category So +// So is the set of Unicode characters in category So. var So = _So var _So = []Range { Range{0x00a6, 0x00a7, 1}, @@ -1315,7 +1315,7 @@ var _So = []Range { Range{0x1f030, 0x1f093, 1}, } -// Sm is the set of Unicode characters in category Sm +// Sm is the set of Unicode characters in category Sm. var Sm = _Sm var _Sm = []Range { Range{0x002b, 0x003c, 17}, @@ -1370,7 +1370,7 @@ var _Sm = []Range { Range{0x1d7a9, 0x1d7c3, 26}, } -// Sk is the set of Unicode characters in category Sk +// Sk is the set of Unicode characters in category Sk. var Sk = _Sk var _Sk = []Range { Range{0x005e, 0x0060, 2}, @@ -1396,7 +1396,7 @@ var _Sk = []Range { Range{0xffe3, 0xffe3, 1}, } -// Sc is the set of Unicode characters in category Sc +// Sc is the set of Unicode characters in category Sc. var Sc = _Sc var _Sc = []Range { Range{0x0024, 0x00a2, 126}, @@ -1416,7 +1416,7 @@ var _Sc = []Range { // Upper is the set of Unicode upper case letters. var Upper = Lu -// Lu is the set of Unicode characters in category Lu +// Lu is the set of Unicode characters in category Lu. var Lu = _Lu var _Lu = []Range { Range{0x0041, 0x005a, 1}, @@ -1549,7 +1549,7 @@ var _Lu = []Range { // Title is the set of Unicode title case letters. var Title = Lt -// Lt is the set of Unicode characters in category Lt +// Lt is the set of Unicode characters in category Lt. var Lt = _Lt var _Lt = []Range { Range{0x01c5, 0x01cb, 3}, @@ -1561,7 +1561,7 @@ var _Lt = []Range { Range{0x1ffc, 0x1ffc, 1}, } -// Lo is the set of Unicode characters in category Lo +// Lo is the set of Unicode characters in category Lo. var Lo = _Lo var _Lo = []Range { Range{0x01bb, 0x01c0, 5}, @@ -1838,3 +1838,1068 @@ var _Lo = []Range { Range{0x2f800, 0x2fa1d, 1}, } +// Generated by running +// maketables --scripts=all --url=http://www.unicode.org/Public/5.1.0/ucd/ +// DO NOT EDIT + +// Scripts is the set of Unicode script tables. +var Scripts = map[string] []Range { + "Katakana": Katakana, + "Malayalam": Malayalam, + "Phags_Pa": Phags_Pa, + "Latin": Latin, + "Osmanya": Osmanya, + "Khmer": Khmer, + "Inherited": Inherited, + "Telugu": Telugu, + "Bopomofo": Bopomofo, + "Kayah_Li": Kayah_Li, + "New_Tai_Lue": New_Tai_Lue, + "Tai_Le": Tai_Le, + "Kharoshthi": Kharoshthi, + "Common": Common, + "Kannada": Kannada, + "Tamil": Tamil, + "Tagalog": Tagalog, + "Arabic": Arabic, + "Tagbanwa": Tagbanwa, + "Canadian_Aboriginal": Canadian_Aboriginal, + "Tibetan": Tibetan, + "Coptic": Coptic, + "Hiragana": Hiragana, + "Limbu": Limbu, + "Myanmar": Myanmar, + "Armenian": Armenian, + "Sinhala": Sinhala, + "Bengali": Bengali, + "Greek": Greek, + "Cham": Cham, + "Hebrew": Hebrew, + "Saurashtra": Saurashtra, + "Hangul": Hangul, + "Runic": Runic, + "Deseret": Deseret, + "Sundanese": Sundanese, + "Glagolitic": Glagolitic, + "Oriya": Oriya, + "Buhid": Buhid, + "Ethiopic": Ethiopic, + "Syloti_Nagri": Syloti_Nagri, + "Vai": Vai, + "Cherokee": Cherokee, + "Ogham": Ogham, + "Syriac": Syriac, + "Gurmukhi": Gurmukhi, + "Ol_Chiki": Ol_Chiki, + "Mongolian": Mongolian, + "Hanunoo": Hanunoo, + "Cypriot": Cypriot, + "Buginese": Buginese, + "Lepcha": Lepcha, + "Thaana": Thaana, + "Old_Persian": Old_Persian, + "Cuneiform": Cuneiform, + "Rejang": Rejang, + "Georgian": Georgian, + "Shavian": Shavian, + "Lycian": Lycian, + "Nko": Nko, + "Yi": Yi, + "Lao": Lao, + "Linear_B": Linear_B, + "Old_Italic": Old_Italic, + "Devanagari": Devanagari, + "Lydian": Lydian, + "Tifinagh": Tifinagh, + "Ugaritic": Ugaritic, + "Thai": Thai, + "Cyrillic": Cyrillic, + "Gujarati": Gujarati, + "Carian": Carian, + "Phoenician": Phoenician, + "Balinese": Balinese, + "Braille": Braille, + "Han": Han, + "Gothic": Gothic, +} + +// Katakana is the set of Unicode characters in script Katakana. +var Katakana = _Katakana +var _Katakana = []Range { + Range{0x30a1, 0x30fa, 1}, + Range{0x30fd, 0x30ff, 1}, + Range{0x31f0, 0x31ff, 1}, + Range{0x32d0, 0x32fe, 1}, + Range{0x3300, 0x3357, 1}, + Range{0xff66, 0xff6f, 1}, + Range{0xff71, 0xff9d, 1}, +} + +// Malayalam is the set of Unicode characters in script Malayalam. +var Malayalam = _Malayalam +var _Malayalam = []Range { + Range{0x0d02, 0x0d03, 1}, + Range{0x0d05, 0x0d0c, 1}, + Range{0x0d0e, 0x0d10, 1}, + Range{0x0d12, 0x0d28, 1}, + Range{0x0d2a, 0x0d39, 1}, + Range{0x0d3d, 0x0d44, 1}, + Range{0x0d46, 0x0d48, 1}, + Range{0x0d4a, 0x0d4d, 1}, + Range{0x0d57, 0x0d57, 1}, + Range{0x0d60, 0x0d63, 1}, + Range{0x0d66, 0x0d75, 1}, + Range{0x0d79, 0x0d7f, 1}, +} + +// Phags_Pa is the set of Unicode characters in script Phags_Pa. +var Phags_Pa = _Phags_Pa +var _Phags_Pa = []Range { + Range{0xa840, 0xa877, 1}, +} + +// Latin is the set of Unicode characters in script Latin. +var Latin = _Latin +var _Latin = []Range { + Range{0x0041, 0x005a, 1}, + Range{0x0061, 0x007a, 1}, + Range{0x00aa, 0x00aa, 1}, + Range{0x00ba, 0x00ba, 1}, + Range{0x00c0, 0x00d6, 1}, + Range{0x00d8, 0x00f6, 1}, + Range{0x00f8, 0x02b8, 1}, + Range{0x02e0, 0x02e4, 1}, + Range{0x1d00, 0x1d25, 1}, + Range{0x1d2c, 0x1d5c, 1}, + Range{0x1d62, 0x1d65, 1}, + Range{0x1d6b, 0x1d77, 1}, + Range{0x1d79, 0x1dbe, 1}, + Range{0x1e00, 0x1eff, 1}, + Range{0x2071, 0x2071, 1}, + Range{0x207f, 0x207f, 1}, + Range{0x2090, 0x2094, 1}, + Range{0x212a, 0x212b, 1}, + Range{0x2132, 0x2132, 1}, + Range{0x214e, 0x214e, 1}, + Range{0x2160, 0x2188, 1}, + Range{0x2c60, 0x2c6f, 1}, + Range{0x2c71, 0x2c7d, 1}, + Range{0xa722, 0xa787, 1}, + Range{0xa78b, 0xa78c, 1}, + Range{0xa7fb, 0xa7ff, 1}, + Range{0xfb00, 0xfb06, 1}, + Range{0xff21, 0xff3a, 1}, + Range{0xff41, 0xff5a, 1}, +} + +// Osmanya is the set of Unicode characters in script Osmanya. +var Osmanya = _Osmanya +var _Osmanya = []Range { + Range{0x10480, 0x1049d, 1}, + Range{0x104a0, 0x104a9, 1}, +} + +// Khmer is the set of Unicode characters in script Khmer. +var Khmer = _Khmer +var _Khmer = []Range { + Range{0x1780, 0x17dd, 1}, + Range{0x17e0, 0x17e9, 1}, + Range{0x17f0, 0x17f9, 1}, + Range{0x19e0, 0x19ff, 1}, +} + +// Inherited is the set of Unicode characters in script Inherited. +var Inherited = _Inherited +var _Inherited = []Range { + Range{0x0300, 0x036f, 1}, + Range{0x064b, 0x0655, 1}, + Range{0x0670, 0x0670, 1}, + Range{0x0951, 0x0952, 1}, + Range{0x1dc0, 0x1de6, 1}, + Range{0x1dfe, 0x1dff, 1}, + Range{0x200c, 0x200d, 1}, + Range{0x20d0, 0x20f0, 1}, + Range{0x302a, 0x302f, 1}, + Range{0x3099, 0x309a, 1}, + Range{0xfe00, 0xfe0f, 1}, + Range{0xfe20, 0xfe26, 1}, + Range{0x101fd, 0x101fd, 1}, + Range{0x1d167, 0x1d169, 1}, + Range{0x1d17b, 0x1d182, 1}, + Range{0x1d185, 0x1d18b, 1}, + Range{0x1d1aa, 0x1d1ad, 1}, + Range{0xe0100, 0xe01ef, 1}, +} + +// Telugu is the set of Unicode characters in script Telugu. +var Telugu = _Telugu +var _Telugu = []Range { + Range{0x0c01, 0x0c03, 1}, + Range{0x0c05, 0x0c0c, 1}, + Range{0x0c0e, 0x0c10, 1}, + Range{0x0c12, 0x0c28, 1}, + Range{0x0c2a, 0x0c33, 1}, + Range{0x0c35, 0x0c39, 1}, + Range{0x0c3d, 0x0c44, 1}, + Range{0x0c46, 0x0c48, 1}, + Range{0x0c4a, 0x0c4d, 1}, + Range{0x0c55, 0x0c56, 1}, + Range{0x0c58, 0x0c59, 1}, + Range{0x0c60, 0x0c63, 1}, + Range{0x0c66, 0x0c6f, 1}, + Range{0x0c78, 0x0c7f, 1}, +} + +// Bopomofo is the set of Unicode characters in script Bopomofo. +var Bopomofo = _Bopomofo +var _Bopomofo = []Range { + Range{0x3105, 0x312d, 1}, + Range{0x31a0, 0x31b7, 1}, +} + +// Kayah_Li is the set of Unicode characters in script Kayah_Li. +var Kayah_Li = _Kayah_Li +var _Kayah_Li = []Range { + Range{0xa900, 0xa92f, 1}, +} + +// New_Tai_Lue is the set of Unicode characters in script New_Tai_Lue. +var New_Tai_Lue = _New_Tai_Lue +var _New_Tai_Lue = []Range { + Range{0x1980, 0x19a9, 1}, + Range{0x19b0, 0x19c9, 1}, + Range{0x19d0, 0x19d9, 1}, + Range{0x19de, 0x19df, 1}, +} + +// Tai_Le is the set of Unicode characters in script Tai_Le. +var Tai_Le = _Tai_Le +var _Tai_Le = []Range { + Range{0x1950, 0x196d, 1}, + Range{0x1970, 0x1974, 1}, +} + +// Kharoshthi is the set of Unicode characters in script Kharoshthi. +var Kharoshthi = _Kharoshthi +var _Kharoshthi = []Range { + Range{0x10a00, 0x10a03, 1}, + Range{0x10a05, 0x10a06, 1}, + Range{0x10a0c, 0x10a13, 1}, + Range{0x10a15, 0x10a17, 1}, + Range{0x10a19, 0x10a33, 1}, + Range{0x10a38, 0x10a3a, 1}, + Range{0x10a3f, 0x10a47, 1}, + Range{0x10a50, 0x10a58, 1}, +} + +// Common is the set of Unicode characters in script Common. +var Common = _Common +var _Common = []Range { + Range{0x0000, 0x0040, 1}, + Range{0x005b, 0x0060, 1}, + Range{0x007b, 0x00a9, 1}, + Range{0x00ab, 0x00b9, 1}, + Range{0x00bb, 0x00bf, 1}, + Range{0x00d7, 0x00d7, 1}, + Range{0x00f7, 0x00f7, 1}, + Range{0x02b9, 0x02df, 1}, + Range{0x02e5, 0x02ff, 1}, + Range{0x0374, 0x0374, 1}, + Range{0x037e, 0x037e, 1}, + Range{0x0385, 0x0385, 1}, + Range{0x0387, 0x0387, 1}, + Range{0x0589, 0x0589, 1}, + Range{0x0600, 0x0603, 1}, + Range{0x060c, 0x060c, 1}, + Range{0x061b, 0x061b, 1}, + Range{0x061f, 0x061f, 1}, + Range{0x0640, 0x0640, 1}, + Range{0x0660, 0x0669, 1}, + Range{0x06dd, 0x06dd, 1}, + Range{0x0964, 0x0965, 1}, + Range{0x0970, 0x0970, 1}, + Range{0x0cf1, 0x0cf2, 1}, + Range{0x0e3f, 0x0e3f, 1}, + Range{0x10fb, 0x10fb, 1}, + Range{0x16eb, 0x16ed, 1}, + Range{0x1735, 0x1736, 1}, + Range{0x1802, 0x1803, 1}, + Range{0x1805, 0x1805, 1}, + Range{0x2000, 0x200b, 1}, + Range{0x200e, 0x2064, 1}, + Range{0x206a, 0x2070, 1}, + Range{0x2074, 0x207e, 1}, + Range{0x2080, 0x208e, 1}, + Range{0x20a0, 0x20b5, 1}, + Range{0x2100, 0x2125, 1}, + Range{0x2127, 0x2129, 1}, + Range{0x212c, 0x2131, 1}, + Range{0x2133, 0x214d, 1}, + Range{0x214f, 0x214f, 1}, + Range{0x2153, 0x215f, 1}, + Range{0x2190, 0x23e7, 1}, + Range{0x2400, 0x2426, 1}, + Range{0x2440, 0x244a, 1}, + Range{0x2460, 0x269d, 1}, + Range{0x26a0, 0x26bc, 1}, + Range{0x26c0, 0x26c3, 1}, + Range{0x2701, 0x2704, 1}, + Range{0x2706, 0x2709, 1}, + Range{0x270c, 0x2727, 1}, + Range{0x2729, 0x274b, 1}, + Range{0x274d, 0x274d, 1}, + Range{0x274f, 0x2752, 1}, + Range{0x2756, 0x2756, 1}, + Range{0x2758, 0x275e, 1}, + Range{0x2761, 0x2794, 1}, + Range{0x2798, 0x27af, 1}, + Range{0x27b1, 0x27be, 1}, + Range{0x27c0, 0x27ca, 1}, + Range{0x27cc, 0x27cc, 1}, + Range{0x27d0, 0x27ff, 1}, + Range{0x2900, 0x2b4c, 1}, + Range{0x2b50, 0x2b54, 1}, + Range{0x2e00, 0x2e30, 1}, + Range{0x2ff0, 0x2ffb, 1}, + Range{0x3000, 0x3004, 1}, + Range{0x3006, 0x3006, 1}, + Range{0x3008, 0x3020, 1}, + Range{0x3030, 0x3037, 1}, + Range{0x303c, 0x303f, 1}, + Range{0x309b, 0x309c, 1}, + Range{0x30a0, 0x30a0, 1}, + Range{0x30fb, 0x30fc, 1}, + Range{0x3190, 0x319f, 1}, + Range{0x31c0, 0x31e3, 1}, + Range{0x3220, 0x3243, 1}, + Range{0x3250, 0x325f, 1}, + Range{0x327f, 0x32cf, 1}, + Range{0x3358, 0x33ff, 1}, + Range{0x4dc0, 0x4dff, 1}, + Range{0xa700, 0xa721, 1}, + Range{0xa788, 0xa78a, 1}, + Range{0xfd3e, 0xfd3f, 1}, + Range{0xfdfd, 0xfdfd, 1}, + Range{0xfe10, 0xfe19, 1}, + Range{0xfe30, 0xfe52, 1}, + Range{0xfe54, 0xfe66, 1}, + Range{0xfe68, 0xfe6b, 1}, + Range{0xfeff, 0xfeff, 1}, + Range{0xff01, 0xff20, 1}, + Range{0xff3b, 0xff40, 1}, + Range{0xff5b, 0xff65, 1}, + Range{0xff70, 0xff70, 1}, + Range{0xff9e, 0xff9f, 1}, + Range{0xffe0, 0xffe6, 1}, + Range{0xffe8, 0xffee, 1}, + Range{0xfff9, 0xfffd, 1}, + Range{0x10100, 0x10102, 1}, + Range{0x10107, 0x10133, 1}, + Range{0x10137, 0x1013f, 1}, + Range{0x10190, 0x1019b, 1}, + Range{0x101d0, 0x101fc, 1}, + Range{0x1d000, 0x1d0f5, 1}, + Range{0x1d100, 0x1d126, 1}, + Range{0x1d129, 0x1d166, 1}, + Range{0x1d16a, 0x1d17a, 1}, + Range{0x1d183, 0x1d184, 1}, + Range{0x1d18c, 0x1d1a9, 1}, + Range{0x1d1ae, 0x1d1dd, 1}, + Range{0x1d300, 0x1d356, 1}, + Range{0x1d360, 0x1d371, 1}, + Range{0x1d400, 0x1d454, 1}, + Range{0x1d456, 0x1d49c, 1}, + Range{0x1d49e, 0x1d49f, 1}, + Range{0x1d4a2, 0x1d4a2, 1}, + Range{0x1d4a5, 0x1d4a6, 1}, + Range{0x1d4a9, 0x1d4ac, 1}, + Range{0x1d4ae, 0x1d4b9, 1}, + Range{0x1d4bb, 0x1d4bb, 1}, + Range{0x1d4bd, 0x1d4c3, 1}, + Range{0x1d4c5, 0x1d505, 1}, + Range{0x1d507, 0x1d50a, 1}, + Range{0x1d50d, 0x1d514, 1}, + Range{0x1d516, 0x1d51c, 1}, + Range{0x1d51e, 0x1d539, 1}, + Range{0x1d53b, 0x1d53e, 1}, + Range{0x1d540, 0x1d544, 1}, + Range{0x1d546, 0x1d546, 1}, + Range{0x1d54a, 0x1d550, 1}, + Range{0x1d552, 0x1d6a5, 1}, + Range{0x1d6a8, 0x1d7cb, 1}, + Range{0x1d7ce, 0x1d7ff, 1}, + Range{0x1f000, 0x1f02b, 1}, + Range{0x1f030, 0x1f093, 1}, + Range{0xe0001, 0xe0001, 1}, + Range{0xe0020, 0xe007f, 1}, +} + +// Kannada is the set of Unicode characters in script Kannada. +var Kannada = _Kannada +var _Kannada = []Range { + Range{0x0c82, 0x0c83, 1}, + Range{0x0c85, 0x0c8c, 1}, + Range{0x0c8e, 0x0c90, 1}, + Range{0x0c92, 0x0ca8, 1}, + Range{0x0caa, 0x0cb3, 1}, + Range{0x0cb5, 0x0cb9, 1}, + Range{0x0cbc, 0x0cc4, 1}, + Range{0x0cc6, 0x0cc8, 1}, + Range{0x0cca, 0x0ccd, 1}, + Range{0x0cd5, 0x0cd6, 1}, + Range{0x0cde, 0x0cde, 1}, + Range{0x0ce0, 0x0ce3, 1}, + Range{0x0ce6, 0x0cef, 1}, +} + +// Tamil is the set of Unicode characters in script Tamil. +var Tamil = _Tamil +var _Tamil = []Range { + Range{0x0b82, 0x0b83, 1}, + Range{0x0b85, 0x0b8a, 1}, + Range{0x0b8e, 0x0b90, 1}, + Range{0x0b92, 0x0b95, 1}, + Range{0x0b99, 0x0b9a, 1}, + Range{0x0b9c, 0x0b9c, 1}, + Range{0x0b9e, 0x0b9f, 1}, + Range{0x0ba3, 0x0ba4, 1}, + Range{0x0ba8, 0x0baa, 1}, + Range{0x0bae, 0x0bb9, 1}, + Range{0x0bbe, 0x0bc2, 1}, + Range{0x0bc6, 0x0bc8, 1}, + Range{0x0bca, 0x0bcd, 1}, + Range{0x0bd0, 0x0bd0, 1}, + Range{0x0bd7, 0x0bd7, 1}, + Range{0x0be6, 0x0bfa, 1}, +} + +// Tagalog is the set of Unicode characters in script Tagalog. +var Tagalog = _Tagalog +var _Tagalog = []Range { + Range{0x1700, 0x170c, 1}, + Range{0x170e, 0x1714, 1}, +} + +// Arabic is the set of Unicode characters in script Arabic. +var Arabic = _Arabic +var _Arabic = []Range { + Range{0x0606, 0x060b, 1}, + Range{0x060d, 0x061a, 1}, + Range{0x061e, 0x061e, 1}, + Range{0x0621, 0x063f, 1}, + Range{0x0641, 0x064a, 1}, + Range{0x0656, 0x065e, 1}, + Range{0x066a, 0x066f, 1}, + Range{0x0671, 0x06dc, 1}, + Range{0x06de, 0x06ff, 1}, + Range{0x0750, 0x077f, 1}, + Range{0xfb50, 0xfbb1, 1}, + Range{0xfbd3, 0xfd3d, 1}, + Range{0xfd50, 0xfd8f, 1}, + Range{0xfd92, 0xfdc7, 1}, + Range{0xfdf0, 0xfdfc, 1}, + Range{0xfe70, 0xfe74, 1}, + Range{0xfe76, 0xfefc, 1}, +} + +// Tagbanwa is the set of Unicode characters in script Tagbanwa. +var Tagbanwa = _Tagbanwa +var _Tagbanwa = []Range { + Range{0x1760, 0x176c, 1}, + Range{0x176e, 0x1770, 1}, + Range{0x1772, 0x1773, 1}, +} + +// Canadian_Aboriginal is the set of Unicode characters in script Canadian_Aboriginal. +var Canadian_Aboriginal = _Canadian_Aboriginal +var _Canadian_Aboriginal = []Range { + Range{0x1401, 0x1676, 1}, +} + +// Tibetan is the set of Unicode characters in script Tibetan. +var Tibetan = _Tibetan +var _Tibetan = []Range { + Range{0x0f00, 0x0f47, 1}, + Range{0x0f49, 0x0f6c, 1}, + Range{0x0f71, 0x0f8b, 1}, + Range{0x0f90, 0x0f97, 1}, + Range{0x0f99, 0x0fbc, 1}, + Range{0x0fbe, 0x0fcc, 1}, + Range{0x0fce, 0x0fd4, 1}, +} + +// Coptic is the set of Unicode characters in script Coptic. +var Coptic = _Coptic +var _Coptic = []Range { + Range{0x03e2, 0x03ef, 1}, + Range{0x2c80, 0x2cea, 1}, + Range{0x2cf9, 0x2cff, 1}, +} + +// Hiragana is the set of Unicode characters in script Hiragana. +var Hiragana = _Hiragana +var _Hiragana = []Range { + Range{0x3041, 0x3096, 1}, + Range{0x309d, 0x309f, 1}, +} + +// Limbu is the set of Unicode characters in script Limbu. +var Limbu = _Limbu +var _Limbu = []Range { + Range{0x1900, 0x191c, 1}, + Range{0x1920, 0x192b, 1}, + Range{0x1930, 0x193b, 1}, + Range{0x1940, 0x1940, 1}, + Range{0x1944, 0x194f, 1}, +} + +// Myanmar is the set of Unicode characters in script Myanmar. +var Myanmar = _Myanmar +var _Myanmar = []Range { + Range{0x1000, 0x1099, 1}, + Range{0x109e, 0x109f, 1}, +} + +// Armenian is the set of Unicode characters in script Armenian. +var Armenian = _Armenian +var _Armenian = []Range { + Range{0x0531, 0x0556, 1}, + Range{0x0559, 0x055f, 1}, + Range{0x0561, 0x0587, 1}, + Range{0x058a, 0x058a, 1}, + Range{0xfb13, 0xfb17, 1}, +} + +// Sinhala is the set of Unicode characters in script Sinhala. +var Sinhala = _Sinhala +var _Sinhala = []Range { + Range{0x0d82, 0x0d83, 1}, + Range{0x0d85, 0x0d96, 1}, + Range{0x0d9a, 0x0db1, 1}, + Range{0x0db3, 0x0dbb, 1}, + Range{0x0dbd, 0x0dbd, 1}, + Range{0x0dc0, 0x0dc6, 1}, + Range{0x0dca, 0x0dca, 1}, + Range{0x0dcf, 0x0dd4, 1}, + Range{0x0dd6, 0x0dd6, 1}, + Range{0x0dd8, 0x0ddf, 1}, + Range{0x0df2, 0x0df4, 1}, +} + +// Bengali is the set of Unicode characters in script Bengali. +var Bengali = _Bengali +var _Bengali = []Range { + Range{0x0981, 0x0983, 1}, + Range{0x0985, 0x098c, 1}, + Range{0x098f, 0x0990, 1}, + Range{0x0993, 0x09a8, 1}, + Range{0x09aa, 0x09b0, 1}, + Range{0x09b2, 0x09b2, 1}, + Range{0x09b6, 0x09b9, 1}, + Range{0x09bc, 0x09c4, 1}, + Range{0x09c7, 0x09c8, 1}, + Range{0x09cb, 0x09ce, 1}, + Range{0x09d7, 0x09d7, 1}, + Range{0x09dc, 0x09dd, 1}, + Range{0x09df, 0x09e3, 1}, + Range{0x09e6, 0x09fa, 1}, +} + +// Greek is the set of Unicode characters in script Greek. +var Greek = _Greek +var _Greek = []Range { + Range{0x0370, 0x0373, 1}, + Range{0x0375, 0x0377, 1}, + Range{0x037a, 0x037d, 1}, + Range{0x0384, 0x0384, 1}, + Range{0x0386, 0x0386, 1}, + Range{0x0388, 0x038a, 1}, + Range{0x038c, 0x038c, 1}, + Range{0x038e, 0x03a1, 1}, + Range{0x03a3, 0x03e1, 1}, + Range{0x03f0, 0x03ff, 1}, + Range{0x1d26, 0x1d2a, 1}, + Range{0x1d5d, 0x1d61, 1}, + Range{0x1d66, 0x1d6a, 1}, + Range{0x1dbf, 0x1dbf, 1}, + Range{0x1f00, 0x1f15, 1}, + Range{0x1f18, 0x1f1d, 1}, + Range{0x1f20, 0x1f45, 1}, + Range{0x1f48, 0x1f4d, 1}, + Range{0x1f50, 0x1f57, 1}, + Range{0x1f59, 0x1f59, 1}, + Range{0x1f5b, 0x1f5b, 1}, + Range{0x1f5d, 0x1f5d, 1}, + Range{0x1f5f, 0x1f7d, 1}, + Range{0x1f80, 0x1fb4, 1}, + Range{0x1fb6, 0x1fc4, 1}, + Range{0x1fc6, 0x1fd3, 1}, + Range{0x1fd6, 0x1fdb, 1}, + Range{0x1fdd, 0x1fef, 1}, + Range{0x1ff2, 0x1ff4, 1}, + Range{0x1ff6, 0x1ffe, 1}, + Range{0x2126, 0x2126, 1}, + Range{0x10140, 0x1018a, 1}, + Range{0x1d200, 0x1d245, 1}, +} + +// Cham is the set of Unicode characters in script Cham. +var Cham = _Cham +var _Cham = []Range { + Range{0xaa00, 0xaa36, 1}, + Range{0xaa40, 0xaa4d, 1}, + Range{0xaa50, 0xaa59, 1}, + Range{0xaa5c, 0xaa5f, 1}, +} + +// Hebrew is the set of Unicode characters in script Hebrew. +var Hebrew = _Hebrew +var _Hebrew = []Range { + Range{0x0591, 0x05c7, 1}, + Range{0x05d0, 0x05ea, 1}, + Range{0x05f0, 0x05f4, 1}, + Range{0xfb1d, 0xfb36, 1}, + Range{0xfb38, 0xfb3c, 1}, + Range{0xfb3e, 0xfb3e, 1}, + Range{0xfb40, 0xfb41, 1}, + Range{0xfb43, 0xfb44, 1}, + Range{0xfb46, 0xfb4f, 1}, +} + +// Saurashtra is the set of Unicode characters in script Saurashtra. +var Saurashtra = _Saurashtra +var _Saurashtra = []Range { + Range{0xa880, 0xa8c4, 1}, + Range{0xa8ce, 0xa8d9, 1}, +} + +// Hangul is the set of Unicode characters in script Hangul. +var Hangul = _Hangul +var _Hangul = []Range { + Range{0x1100, 0x1159, 1}, + Range{0x115f, 0x11a2, 1}, + Range{0x11a8, 0x11f9, 1}, + Range{0x3131, 0x318e, 1}, + Range{0x3200, 0x321e, 1}, + Range{0x3260, 0x327e, 1}, + Range{0xac00, 0xd7a3, 1}, + Range{0xffa0, 0xffbe, 1}, + Range{0xffc2, 0xffc7, 1}, + Range{0xffca, 0xffcf, 1}, + Range{0xffd2, 0xffd7, 1}, + Range{0xffda, 0xffdc, 1}, +} + +// Runic is the set of Unicode characters in script Runic. +var Runic = _Runic +var _Runic = []Range { + Range{0x16a0, 0x16ea, 1}, + Range{0x16ee, 0x16f0, 1}, +} + +// Deseret is the set of Unicode characters in script Deseret. +var Deseret = _Deseret +var _Deseret = []Range { + Range{0x10400, 0x1044f, 1}, +} + +// Sundanese is the set of Unicode characters in script Sundanese. +var Sundanese = _Sundanese +var _Sundanese = []Range { + Range{0x1b80, 0x1baa, 1}, + Range{0x1bae, 0x1bb9, 1}, +} + +// Glagolitic is the set of Unicode characters in script Glagolitic. +var Glagolitic = _Glagolitic +var _Glagolitic = []Range { + Range{0x2c00, 0x2c2e, 1}, + Range{0x2c30, 0x2c5e, 1}, +} + +// Oriya is the set of Unicode characters in script Oriya. +var Oriya = _Oriya +var _Oriya = []Range { + Range{0x0b01, 0x0b03, 1}, + Range{0x0b05, 0x0b0c, 1}, + Range{0x0b0f, 0x0b10, 1}, + Range{0x0b13, 0x0b28, 1}, + Range{0x0b2a, 0x0b30, 1}, + Range{0x0b32, 0x0b33, 1}, + Range{0x0b35, 0x0b39, 1}, + Range{0x0b3c, 0x0b44, 1}, + Range{0x0b47, 0x0b48, 1}, + Range{0x0b4b, 0x0b4d, 1}, + Range{0x0b56, 0x0b57, 1}, + Range{0x0b5c, 0x0b5d, 1}, + Range{0x0b5f, 0x0b63, 1}, + Range{0x0b66, 0x0b71, 1}, +} + +// Buhid is the set of Unicode characters in script Buhid. +var Buhid = _Buhid +var _Buhid = []Range { + Range{0x1740, 0x1753, 1}, +} + +// Ethiopic is the set of Unicode characters in script Ethiopic. +var Ethiopic = _Ethiopic +var _Ethiopic = []Range { + Range{0x1200, 0x1248, 1}, + Range{0x124a, 0x124d, 1}, + Range{0x1250, 0x1256, 1}, + Range{0x1258, 0x1258, 1}, + Range{0x125a, 0x125d, 1}, + Range{0x1260, 0x1288, 1}, + Range{0x128a, 0x128d, 1}, + Range{0x1290, 0x12b0, 1}, + Range{0x12b2, 0x12b5, 1}, + Range{0x12b8, 0x12be, 1}, + Range{0x12c0, 0x12c0, 1}, + Range{0x12c2, 0x12c5, 1}, + Range{0x12c8, 0x12d6, 1}, + Range{0x12d8, 0x1310, 1}, + Range{0x1312, 0x1315, 1}, + Range{0x1318, 0x135a, 1}, + Range{0x135f, 0x137c, 1}, + Range{0x1380, 0x1399, 1}, + Range{0x2d80, 0x2d96, 1}, + Range{0x2da0, 0x2da6, 1}, + Range{0x2da8, 0x2dae, 1}, + Range{0x2db0, 0x2db6, 1}, + Range{0x2db8, 0x2dbe, 1}, + Range{0x2dc0, 0x2dc6, 1}, + Range{0x2dc8, 0x2dce, 1}, + Range{0x2dd0, 0x2dd6, 1}, + Range{0x2dd8, 0x2dde, 1}, +} + +// Syloti_Nagri is the set of Unicode characters in script Syloti_Nagri. +var Syloti_Nagri = _Syloti_Nagri +var _Syloti_Nagri = []Range { + Range{0xa800, 0xa82b, 1}, +} + +// Vai is the set of Unicode characters in script Vai. +var Vai = _Vai +var _Vai = []Range { + Range{0xa500, 0xa62b, 1}, +} + +// Cherokee is the set of Unicode characters in script Cherokee. +var Cherokee = _Cherokee +var _Cherokee = []Range { + Range{0x13a0, 0x13f4, 1}, +} + +// Ogham is the set of Unicode characters in script Ogham. +var Ogham = _Ogham +var _Ogham = []Range { + Range{0x1680, 0x169c, 1}, +} + +// Syriac is the set of Unicode characters in script Syriac. +var Syriac = _Syriac +var _Syriac = []Range { + Range{0x0700, 0x070d, 1}, + Range{0x070f, 0x074a, 1}, + Range{0x074d, 0x074f, 1}, +} + +// Gurmukhi is the set of Unicode characters in script Gurmukhi. +var Gurmukhi = _Gurmukhi +var _Gurmukhi = []Range { + Range{0x0a01, 0x0a03, 1}, + Range{0x0a05, 0x0a0a, 1}, + Range{0x0a0f, 0x0a10, 1}, + Range{0x0a13, 0x0a28, 1}, + Range{0x0a2a, 0x0a30, 1}, + Range{0x0a32, 0x0a33, 1}, + Range{0x0a35, 0x0a36, 1}, + Range{0x0a38, 0x0a39, 1}, + Range{0x0a3c, 0x0a3c, 1}, + Range{0x0a3e, 0x0a42, 1}, + Range{0x0a47, 0x0a48, 1}, + Range{0x0a4b, 0x0a4d, 1}, + Range{0x0a51, 0x0a51, 1}, + Range{0x0a59, 0x0a5c, 1}, + Range{0x0a5e, 0x0a5e, 1}, + Range{0x0a66, 0x0a75, 1}, +} + +// Ol_Chiki is the set of Unicode characters in script Ol_Chiki. +var Ol_Chiki = _Ol_Chiki +var _Ol_Chiki = []Range { + Range{0x1c50, 0x1c7f, 1}, +} + +// Mongolian is the set of Unicode characters in script Mongolian. +var Mongolian = _Mongolian +var _Mongolian = []Range { + Range{0x1800, 0x1801, 1}, + Range{0x1804, 0x1804, 1}, + Range{0x1806, 0x180e, 1}, + Range{0x1810, 0x1819, 1}, + Range{0x1820, 0x1877, 1}, + Range{0x1880, 0x18aa, 1}, +} + +// Hanunoo is the set of Unicode characters in script Hanunoo. +var Hanunoo = _Hanunoo +var _Hanunoo = []Range { + Range{0x1720, 0x1734, 1}, +} + +// Cypriot is the set of Unicode characters in script Cypriot. +var Cypriot = _Cypriot +var _Cypriot = []Range { + Range{0x10800, 0x10805, 1}, + Range{0x10808, 0x10808, 1}, + Range{0x1080a, 0x10835, 1}, + Range{0x10837, 0x10838, 1}, + Range{0x1083c, 0x1083c, 1}, + Range{0x1083f, 0x1083f, 1}, +} + +// Buginese is the set of Unicode characters in script Buginese. +var Buginese = _Buginese +var _Buginese = []Range { + Range{0x1a00, 0x1a1b, 1}, + Range{0x1a1e, 0x1a1f, 1}, +} + +// Lepcha is the set of Unicode characters in script Lepcha. +var Lepcha = _Lepcha +var _Lepcha = []Range { + Range{0x1c00, 0x1c37, 1}, + Range{0x1c3b, 0x1c49, 1}, + Range{0x1c4d, 0x1c4f, 1}, +} + +// Thaana is the set of Unicode characters in script Thaana. +var Thaana = _Thaana +var _Thaana = []Range { + Range{0x0780, 0x07b1, 1}, +} + +// Old_Persian is the set of Unicode characters in script Old_Persian. +var Old_Persian = _Old_Persian +var _Old_Persian = []Range { + Range{0x103a0, 0x103c3, 1}, + Range{0x103c8, 0x103d5, 1}, +} + +// Cuneiform is the set of Unicode characters in script Cuneiform. +var Cuneiform = _Cuneiform +var _Cuneiform = []Range { + Range{0x12000, 0x1236e, 1}, + Range{0x12400, 0x12462, 1}, + Range{0x12470, 0x12473, 1}, +} + +// Rejang is the set of Unicode characters in script Rejang. +var Rejang = _Rejang +var _Rejang = []Range { + Range{0xa930, 0xa953, 1}, + Range{0xa95f, 0xa95f, 1}, +} + +// Georgian is the set of Unicode characters in script Georgian. +var Georgian = _Georgian +var _Georgian = []Range { + Range{0x10a0, 0x10c5, 1}, + Range{0x10d0, 0x10fa, 1}, + Range{0x10fc, 0x10fc, 1}, + Range{0x2d00, 0x2d25, 1}, +} + +// Shavian is the set of Unicode characters in script Shavian. +var Shavian = _Shavian +var _Shavian = []Range { + Range{0x10450, 0x1047f, 1}, +} + +// Lycian is the set of Unicode characters in script Lycian. +var Lycian = _Lycian +var _Lycian = []Range { + Range{0x10280, 0x1029c, 1}, +} + +// Nko is the set of Unicode characters in script Nko. +var Nko = _Nko +var _Nko = []Range { + Range{0x07c0, 0x07fa, 1}, +} + +// Yi is the set of Unicode characters in script Yi. +var Yi = _Yi +var _Yi = []Range { + Range{0xa000, 0xa48c, 1}, + Range{0xa490, 0xa4c6, 1}, +} + +// Lao is the set of Unicode characters in script Lao. +var Lao = _Lao +var _Lao = []Range { + Range{0x0e81, 0x0e82, 1}, + Range{0x0e84, 0x0e84, 1}, + Range{0x0e87, 0x0e88, 1}, + Range{0x0e8a, 0x0e8a, 1}, + Range{0x0e8d, 0x0e8d, 1}, + Range{0x0e94, 0x0e97, 1}, + Range{0x0e99, 0x0e9f, 1}, + Range{0x0ea1, 0x0ea3, 1}, + Range{0x0ea5, 0x0ea5, 1}, + Range{0x0ea7, 0x0ea7, 1}, + Range{0x0eaa, 0x0eab, 1}, + Range{0x0ead, 0x0eb9, 1}, + Range{0x0ebb, 0x0ebd, 1}, + Range{0x0ec0, 0x0ec4, 1}, + Range{0x0ec6, 0x0ec6, 1}, + Range{0x0ec8, 0x0ecd, 1}, + Range{0x0ed0, 0x0ed9, 1}, + Range{0x0edc, 0x0edd, 1}, +} + +// Linear_B is the set of Unicode characters in script Linear_B. +var Linear_B = _Linear_B +var _Linear_B = []Range { + Range{0x10000, 0x1000b, 1}, + Range{0x1000d, 0x10026, 1}, + Range{0x10028, 0x1003a, 1}, + Range{0x1003c, 0x1003d, 1}, + Range{0x1003f, 0x1004d, 1}, + Range{0x10050, 0x1005d, 1}, + Range{0x10080, 0x100fa, 1}, +} + +// Old_Italic is the set of Unicode characters in script Old_Italic. +var Old_Italic = _Old_Italic +var _Old_Italic = []Range { + Range{0x10300, 0x1031e, 1}, + Range{0x10320, 0x10323, 1}, +} + +// Devanagari is the set of Unicode characters in script Devanagari. +var Devanagari = _Devanagari +var _Devanagari = []Range { + Range{0x0901, 0x0939, 1}, + Range{0x093c, 0x094d, 1}, + Range{0x0950, 0x0950, 1}, + Range{0x0953, 0x0954, 1}, + Range{0x0958, 0x0963, 1}, + Range{0x0966, 0x096f, 1}, + Range{0x0971, 0x0972, 1}, + Range{0x097b, 0x097f, 1}, +} + +// Lydian is the set of Unicode characters in script Lydian. +var Lydian = _Lydian +var _Lydian = []Range { + Range{0x10920, 0x10939, 1}, + Range{0x1093f, 0x1093f, 1}, +} + +// Tifinagh is the set of Unicode characters in script Tifinagh. +var Tifinagh = _Tifinagh +var _Tifinagh = []Range { + Range{0x2d30, 0x2d65, 1}, + Range{0x2d6f, 0x2d6f, 1}, +} + +// Ugaritic is the set of Unicode characters in script Ugaritic. +var Ugaritic = _Ugaritic +var _Ugaritic = []Range { + Range{0x10380, 0x1039d, 1}, + Range{0x1039f, 0x1039f, 1}, +} + +// Thai is the set of Unicode characters in script Thai. +var Thai = _Thai +var _Thai = []Range { + Range{0x0e01, 0x0e3a, 1}, + Range{0x0e40, 0x0e5b, 1}, +} + +// Cyrillic is the set of Unicode characters in script Cyrillic. +var Cyrillic = _Cyrillic +var _Cyrillic = []Range { + Range{0x0400, 0x0523, 1}, + Range{0x1d2b, 0x1d2b, 1}, + Range{0x1d78, 0x1d78, 1}, + Range{0x2de0, 0x2dff, 1}, + Range{0xa640, 0xa65f, 1}, + Range{0xa662, 0xa673, 1}, + Range{0xa67c, 0xa697, 1}, +} + +// Gujarati is the set of Unicode characters in script Gujarati. +var Gujarati = _Gujarati +var _Gujarati = []Range { + Range{0x0a81, 0x0a83, 1}, + Range{0x0a85, 0x0a8d, 1}, + Range{0x0a8f, 0x0a91, 1}, + Range{0x0a93, 0x0aa8, 1}, + Range{0x0aaa, 0x0ab0, 1}, + Range{0x0ab2, 0x0ab3, 1}, + Range{0x0ab5, 0x0ab9, 1}, + Range{0x0abc, 0x0ac5, 1}, + Range{0x0ac7, 0x0ac9, 1}, + Range{0x0acb, 0x0acd, 1}, + Range{0x0ad0, 0x0ad0, 1}, + Range{0x0ae0, 0x0ae3, 1}, + Range{0x0ae6, 0x0aef, 1}, + Range{0x0af1, 0x0af1, 1}, +} + +// Carian is the set of Unicode characters in script Carian. +var Carian = _Carian +var _Carian = []Range { + Range{0x102a0, 0x102d0, 1}, +} + +// Phoenician is the set of Unicode characters in script Phoenician. +var Phoenician = _Phoenician +var _Phoenician = []Range { + Range{0x10900, 0x10919, 1}, + Range{0x1091f, 0x1091f, 1}, +} + +// Balinese is the set of Unicode characters in script Balinese. +var Balinese = _Balinese +var _Balinese = []Range { + Range{0x1b00, 0x1b4b, 1}, + Range{0x1b50, 0x1b7c, 1}, +} + +// Braille is the set of Unicode characters in script Braille. +var Braille = _Braille +var _Braille = []Range { + Range{0x2800, 0x28ff, 1}, +} + +// Han is the set of Unicode characters in script Han. +var Han = _Han +var _Han = []Range { + Range{0x2e80, 0x2e99, 1}, + Range{0x2e9b, 0x2ef3, 1}, + Range{0x2f00, 0x2fd5, 1}, + Range{0x3005, 0x3005, 1}, + Range{0x3007, 0x3007, 1}, + Range{0x3021, 0x3029, 1}, + Range{0x3038, 0x303b, 1}, + Range{0x3400, 0x4db5, 1}, + Range{0x4e00, 0x9fc3, 1}, + Range{0xf900, 0xfa2d, 1}, + Range{0xfa30, 0xfa6a, 1}, + Range{0xfa70, 0xfad9, 1}, + Range{0x20000, 0x2a6d6, 1}, + Range{0x2f800, 0x2fa1d, 1}, +} + +// Gothic is the set of Unicode characters in script Gothic. +var Gothic = _Gothic +var _Gothic = []Range { + Range{0x10330, 0x1034a, 1}, +} +