]> Cypherpunks repositories - gostls13.git/commitdiff
generate the unicode tables directly from web database
authorRob Pike <r@golang.org>
Wed, 26 Aug 2009 23:01:31 +0000 (16:01 -0700)
committerRob Pike <r@golang.org>
Wed, 26 Aug 2009 23:01:31 +0000 (16:01 -0700)
after this CL, two more to come:
1) add an exhaustive test, probably as a variant of maketables
2) add ToUpper, ToLower, ToTitle and associated tests

R=rsc
DELTA=1578  (1007 added, 559 deleted, 12 changed)
OCL=33902
CL=33907

src/pkg/regexp/regexp.go
src/pkg/unicode/Makefile
src/pkg/unicode/decimaldigit.go
src/pkg/unicode/digittables.go [new file with mode: 0644]
src/pkg/unicode/letter.go
src/pkg/unicode/letter_test.go
src/pkg/unicode/lettertables.go [new file with mode: 0644]
src/pkg/unicode/maketables.go [new file with mode: 0644]

index 15361a2f7c2c17f4960fe8318974cb5a56c9f41f..55c8a6325f3724ad7b10938108c57e6c7887977d 100644 (file)
@@ -68,7 +68,7 @@ func (c *common) setNext(i instr) { c._next = i }
 func (c *common) index() int { return c._index }
 func (c *common) setIndex(i int) { c._index = i }
 
-// The representation of a compiled regular expression.
+// Regexp is the representation of a compiled regular expression.
 // The public interface is entirely through methods.
 type Regexp struct {
        expr    string; // the original expression
index 7ab29c6c4b19fd1717bd0ecc5646f9986e58fa64..da31830399418c69153a650d2e518dd351c53141 100644 (file)
@@ -7,6 +7,15 @@ include $(GOROOT)/src/Make.$(GOARCH)
 TARG=unicode
 GOFILES=\
        decimaldigit.go\
+       digittables.go\
        letter.go\
+       lettertables.go\
 
 include $(GOROOT)/src/Make.pkg
+
+tables:
+       $(GC) maketables.go
+       $(LD) -o maketables maketables.$O
+       maketables --digits > digittables.go
+       maketables > lettertables.go
+       rm -f maketables
index ab65b433d4f7d5b324c34d45cb777070b77d4fb3..b6b438812e678e5ed911d60c8e9f4a197b763551 100644 (file)
@@ -4,46 +4,6 @@
 
 package unicode
 
-// TODO: Generated by hand starting with
-// http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
-// These ranges are the characters with the third field "Nd".
-// Should generate automatically etc.
-
-// Decimal digit is the set of Unicode characters with the "decimal digit" property.
-var DecimalDigit = []Range{
-       Range{0x0030, 0x0039, 1},
-       Range{0x0660, 0x0669, 1},
-       Range{0x06F0, 0x06F9, 1},
-       Range{0x07C0, 0x07C9, 1},
-       Range{0x0966, 0x096F, 1},
-       Range{0x09E6, 0x09EF, 1},
-       Range{0x0A66, 0x0A6F, 1},
-       Range{0x0AE6, 0x0AEF, 1},
-       Range{0x0B66, 0x0B6F, 1},
-       Range{0x0BE6, 0x0BEF, 1},
-       Range{0x0C66, 0x0C6F, 1},
-       Range{0x0CE6, 0x0CEF, 1},
-       Range{0x0D66, 0x0D6F, 1},
-       Range{0x0E50, 0x0E59, 1},
-       Range{0x0ED0, 0x0ED9, 1},
-       Range{0x0F20, 0x0F29, 1},
-       Range{0x1040, 0x1049, 1},
-       Range{0x1090, 0x1099, 1},
-       Range{0x17E0, 0x17E9, 1},
-       Range{0x1810, 0x1819, 1},
-       Range{0x1946, 0x194F, 1},
-       Range{0x19D0, 0x19D9, 1},
-       Range{0x1B50, 0x1B59, 1},
-       Range{0x1BB0, 0x1BB9, 1},
-       Range{0x1C40, 0x1C49, 1},
-       Range{0x1C50, 0x1C59, 1},
-       Range{0xA620, 0xA629, 1},
-       Range{0xA8D0, 0xA8D9, 1},
-       Range{0xA900, 0xA909, 1},
-       Range{0xAA50, 0xAA59, 1},
-       Range{0xFF10, 0xFF19, 1},
-}
-
 // IsDecimalDigit reports whether the rune is a decimal digit.
 func IsDecimalDigit(rune int) bool {
        return Is(DecimalDigit, rune);
diff --git a/src/pkg/unicode/digittables.go b/src/pkg/unicode/digittables.go
new file mode 100644 (file)
index 0000000..f0b11a5
--- /dev/null
@@ -0,0 +1,43 @@
+// Generated by running
+//     tables --digits=true --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
+// DO NOT EDIT
+
+package unicode
+
+// DecimalDigit is the set of Unicode characters with the "decimal digit" property.
+var DecimalDigit = decimalDigit
+var decimalDigit = []Range {
+       Range{0x0030, 0x0039, 1},
+       Range{0x0660, 0x0669, 1},
+       Range{0x06f0, 0x06f9, 1},
+       Range{0x07c0, 0x07c9, 1},
+       Range{0x0966, 0x096f, 1},
+       Range{0x09e6, 0x09ef, 1},
+       Range{0x0a66, 0x0a6f, 1},
+       Range{0x0ae6, 0x0aef, 1},
+       Range{0x0b66, 0x0b6f, 1},
+       Range{0x0be6, 0x0bef, 1},
+       Range{0x0c66, 0x0c6f, 1},
+       Range{0x0ce6, 0x0cef, 1},
+       Range{0x0d66, 0x0d6f, 1},
+       Range{0x0e50, 0x0e59, 1},
+       Range{0x0ed0, 0x0ed9, 1},
+       Range{0x0f20, 0x0f29, 1},
+       Range{0x1040, 0x1049, 1},
+       Range{0x1090, 0x1099, 1},
+       Range{0x17e0, 0x17e9, 1},
+       Range{0x1810, 0x1819, 1},
+       Range{0x1946, 0x194f, 1},
+       Range{0x19d0, 0x19d9, 1},
+       Range{0x1b50, 0x1b59, 1},
+       Range{0x1bb0, 0x1bb9, 1},
+       Range{0x1c40, 0x1c49, 1},
+       Range{0x1c50, 0x1c59, 1},
+       Range{0xa620, 0xa629, 1},
+       Range{0xa8d0, 0xa8d9, 1},
+       Range{0xa900, 0xa909, 1},
+       Range{0xaa50, 0xaa59, 1},
+       Range{0xff10, 0xff19, 1},
+       Range{0x104a0, 0x104a9, 1},
+       Range{0x1d7ce, 0x1d7ff, 1},
+}
index d7aa678c9d25e134630893ab5317a243a9a0881d..f0e9db2ae54b2bba89a98f84a83739ff67763aaa 100644 (file)
@@ -2,14 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Unicode table access.  A quick start for now.
-
-// TODO: Generated by hand.
-// Should generate automatically from Unicode
-// tables, expand to other properties, split into files,
-// link in only the tables that are used by the program,
-// etc.
-
 // This package provides data and functions to test some properties of Unicode code points.
 // It is rudimentary but will improve.
 package unicode
@@ -22,515 +14,6 @@ type Range struct {
        Stride int;
 }
 
-// Upper is the set of Unicode upper case letters.
-var Upper = []Range{
-       Range{0x0041, 0x005a, 1},
-       Range{0x00c0, 0x00d6, 1},
-       Range{0x00d8, 0x00de, 1},
-       Range{0x0100, 0x0136, 2},
-       Range{0x0139, 0x0147, 2},
-       Range{0x014a, 0x0176, 2},
-       Range{0x0178, 0x0179, 1},
-       Range{0x017b, 0x017d, 2},
-       Range{0x0181, 0x0182, 1},
-       Range{0x0184, 0x0184, 1},
-       Range{0x0186, 0x0187, 1},
-       Range{0x0189, 0x018b, 1},
-       Range{0x018e, 0x0191, 1},
-       Range{0x0193, 0x0194, 1},
-       Range{0x0196, 0x0198, 1},
-       Range{0x019c, 0x019d, 1},
-       Range{0x019f, 0x01a0, 1},
-       Range{0x01a2, 0x01a4, 2},
-       Range{0x01a6, 0x01a7, 1},
-       Range{0x01a9, 0x01ac, 3},
-       Range{0x01ae, 0x01af, 1},
-       Range{0x01b1, 0x01b3, 1},
-       Range{0x01b5, 0x01b5, 1},
-       Range{0x01b7, 0x01b8, 1},
-       Range{0x01bc, 0x01c4, 8},
-       Range{0x01c7, 0x01cd, 3},
-       Range{0x01cf, 0x01db, 2},
-       Range{0x01de, 0x01ee, 2},
-       Range{0x01f1, 0x01f4, 3},
-       Range{0x01f6, 0x01f8, 1},
-       Range{0x01fa, 0x0232, 2},
-       Range{0x023a, 0x023b, 1},
-       Range{0x023d, 0x023e, 1},
-       Range{0x0241, 0x0241, 1},
-       Range{0x0243, 0x0246, 1},
-       Range{0x0248, 0x024e, 2},
-       Range{0x0370, 0x0372, 2},
-       Range{0x0376, 0x0386, 16},
-       Range{0x0388, 0x038a, 1},
-       Range{0x038c, 0x038c, 1},
-       Range{0x038e, 0x038f, 1},
-       Range{0x0391, 0x03a1, 1},
-       Range{0x03a3, 0x03ab, 1},
-       Range{0x03cf, 0x03cf, 1},
-       Range{0x03d2, 0x03d4, 1},
-       Range{0x03d8, 0x03ee, 2},
-       Range{0x03f4, 0x03f7, 3},
-       Range{0x03f9, 0x03fa, 1},
-       Range{0x03fd, 0x042f, 1},
-       Range{0x0460, 0x0480, 2},
-       Range{0x048a, 0x04be, 2},
-       Range{0x04c0, 0x04c1, 1},
-       Range{0x04c3, 0x04cd, 2},
-       Range{0x04d0, 0x0522, 2},
-       Range{0x0531, 0x0556, 1},
-       Range{0x10a0, 0x10c5, 1},
-       Range{0x1e00, 0x1e94, 2},
-       Range{0x1e9e, 0x1efe, 2},
-       Range{0x1f08, 0x1f0f, 1},
-       Range{0x1f18, 0x1f1d, 1},
-       Range{0x1f28, 0x1f2f, 1},
-       Range{0x1f38, 0x1f3f, 1},
-       Range{0x1f48, 0x1f4d, 1},
-       Range{0x1f59, 0x1f5f, 2},
-       Range{0x1f68, 0x1f6f, 1},
-       Range{0x1fb8, 0x1fbb, 1},
-       Range{0x1fc8, 0x1fcb, 1},
-       Range{0x1fd8, 0x1fdb, 1},
-       Range{0x1fe8, 0x1fec, 1},
-       Range{0x1ff8, 0x1ffb, 1},
-       Range{0x2102, 0x2107, 5},
-       Range{0x210b, 0x210d, 1},
-       Range{0x2110, 0x2112, 1},
-       Range{0x2115, 0x2115, 1},
-       Range{0x2119, 0x211d, 1},
-       Range{0x2124, 0x2128, 2},
-       Range{0x212a, 0x212d, 1},
-       Range{0x2130, 0x2133, 1},
-       Range{0x213e, 0x213f, 1},
-       Range{0x2145, 0x2183, 62},
-       Range{0x2c00, 0x2c2e, 1},
-       Range{0x2c60, 0x2c60, 1},
-       Range{0x2c62, 0x2c64, 1},
-       Range{0x2c67, 0x2c6b, 2},
-       Range{0x2c6d, 0x2c6f, 1},
-       Range{0x2c72, 0x2c75, 3},
-       Range{0x2c80, 0x2ce2, 2},
-       Range{0xa640, 0xa65e, 2},
-       Range{0xa662, 0xa66c, 2},
-       Range{0xa680, 0xa696, 2},
-       Range{0xa722, 0xa72e, 2},
-       Range{0xa732, 0xa76e, 2},
-       Range{0xa779, 0xa77b, 2},
-       Range{0xa77d, 0xa77e, 1},
-       Range{0xa780, 0xa786, 2},
-       Range{0xa78b, 0xa78b, 1},
-       Range{0xff21, 0xff3a, 1},
-       Range{0x10400, 0x10427, 1},
-       Range{0x1d400, 0x1d419, 1},
-       Range{0x1d434, 0x1d44d, 1},
-       Range{0x1d468, 0x1d481, 1},
-       Range{0x1d49c, 0x1d49c, 1},
-       Range{0x1d49e, 0x1d49f, 1},
-       Range{0x1d4a2, 0x1d4a2, 1},
-       Range{0x1d4a5, 0x1d4a6, 1},
-       Range{0x1d4a9, 0x1d4ac, 1},
-       Range{0x1d4ae, 0x1d4b5, 1},
-       Range{0x1d4d0, 0x1d4e9, 1},
-       Range{0x1d504, 0x1d505, 1},
-       Range{0x1d507, 0x1d50a, 1},
-       Range{0x1d50d, 0x1d514, 1},
-       Range{0x1d516, 0x1d51c, 1},
-       Range{0x1d538, 0x1d539, 1},
-       Range{0x1d53b, 0x1d53e, 1},
-       Range{0x1d540, 0x1d544, 1},
-       Range{0x1d546, 0x1d546, 1},
-       Range{0x1d54a, 0x1d550, 1},
-       Range{0x1d56c, 0x1d585, 1},
-       Range{0x1d5a0, 0x1d5b9, 1},
-       Range{0x1d5d4, 0x1d5ed, 1},
-       Range{0x1d608, 0x1d621, 1},
-       Range{0x1d63c, 0x1d655, 1},
-       Range{0x1d670, 0x1d689, 1},
-       Range{0x1d6a8, 0x1d6c0, 1},
-       Range{0x1d6e2, 0x1d6fa, 1},
-       Range{0x1d71c, 0x1d734, 1},
-       Range{0x1d756, 0x1d76e, 1},
-       Range{0x1d790, 0x1d7a8, 1},
-       Range{0x1d7ca, 0x1d7ca, 1},
-}
-
-// Letter is the set of Unicode letters.
-var Letter = []Range {
-       Range{0x0041, 0x005a, 1},
-       Range{0x0061, 0x007a, 1},
-       Range{0x00aa, 0x00b5, 11},
-       Range{0x00ba, 0x00ba, 1},
-       Range{0x00c0, 0x00d6, 1},
-       Range{0x00d8, 0x00f6, 1},
-       Range{0x00f8, 0x02c1, 1},
-       Range{0x02c6, 0x02d1, 1},
-       Range{0x02e0, 0x02e4, 1},
-       Range{0x02ec, 0x02ee, 2},
-       Range{0x0370, 0x0374, 1},
-       Range{0x0376, 0x0377, 1},
-       Range{0x037a, 0x037d, 1},
-       Range{0x0386, 0x0386, 1},
-       Range{0x0388, 0x038a, 1},
-       Range{0x038c, 0x038c, 1},
-       Range{0x038e, 0x03a1, 1},
-       Range{0x03a3, 0x03f5, 1},
-       Range{0x03f7, 0x0481, 1},
-       Range{0x048a, 0x0523, 1},
-       Range{0x0531, 0x0556, 1},
-       Range{0x0559, 0x0559, 1},
-       Range{0x0561, 0x0587, 1},
-       Range{0x05d0, 0x05ea, 1},
-       Range{0x05f0, 0x05f2, 1},
-       Range{0x0621, 0x064a, 1},
-       Range{0x066e, 0x066f, 1},
-       Range{0x0671, 0x06d3, 1},
-       Range{0x06d5, 0x06d5, 1},
-       Range{0x06e5, 0x06e6, 1},
-       Range{0x06ee, 0x06ef, 1},
-       Range{0x06fa, 0x06fc, 1},
-       Range{0x06ff, 0x0710, 17},
-       Range{0x0712, 0x072f, 1},
-       Range{0x074d, 0x07a5, 1},
-       Range{0x07b1, 0x07b1, 1},
-       Range{0x07ca, 0x07ea, 1},
-       Range{0x07f4, 0x07f5, 1},
-       Range{0x07fa, 0x07fa, 1},
-       Range{0x0904, 0x0939, 1},
-       Range{0x093d, 0x0950, 19},
-       Range{0x0958, 0x0961, 1},
-       Range{0x0971, 0x0972, 1},
-       Range{0x097b, 0x097f, 1},
-       Range{0x0985, 0x098c, 1},
-       Range{0x098f, 0x0990, 1},
-       Range{0x0993, 0x09a8, 1},
-       Range{0x09aa, 0x09b0, 1},
-       Range{0x09b2, 0x09b2, 1},
-       Range{0x09b6, 0x09b9, 1},
-       Range{0x09bd, 0x09ce, 17},
-       Range{0x09dc, 0x09dd, 1},
-       Range{0x09df, 0x09e1, 1},
-       Range{0x09f0, 0x09f1, 1},
-       Range{0x0a05, 0x0a0a, 1},
-       Range{0x0a0f, 0x0a10, 1},
-       Range{0x0a13, 0x0a28, 1},
-       Range{0x0a2a, 0x0a30, 1},
-       Range{0x0a32, 0x0a33, 1},
-       Range{0x0a35, 0x0a36, 1},
-       Range{0x0a38, 0x0a39, 1},
-       Range{0x0a59, 0x0a5c, 1},
-       Range{0x0a5e, 0x0a5e, 1},
-       Range{0x0a72, 0x0a74, 1},
-       Range{0x0a85, 0x0a8d, 1},
-       Range{0x0a8f, 0x0a91, 1},
-       Range{0x0a93, 0x0aa8, 1},
-       Range{0x0aaa, 0x0ab0, 1},
-       Range{0x0ab2, 0x0ab3, 1},
-       Range{0x0ab5, 0x0ab9, 1},
-       Range{0x0abd, 0x0ad0, 19},
-       Range{0x0ae0, 0x0ae1, 1},
-       Range{0x0b05, 0x0b0c, 1},
-       Range{0x0b0f, 0x0b10, 1},
-       Range{0x0b13, 0x0b28, 1},
-       Range{0x0b2a, 0x0b30, 1},
-       Range{0x0b32, 0x0b33, 1},
-       Range{0x0b35, 0x0b39, 1},
-       Range{0x0b3d, 0x0b3d, 1},
-       Range{0x0b5c, 0x0b5d, 1},
-       Range{0x0b5f, 0x0b61, 1},
-       Range{0x0b71, 0x0b83, 18},
-       Range{0x0b85, 0x0b8a, 1},
-       Range{0x0b8e, 0x0b90, 1},
-       Range{0x0b92, 0x0b95, 1},
-       Range{0x0b99, 0x0b9a, 1},
-       Range{0x0b9c, 0x0b9c, 1},
-       Range{0x0b9e, 0x0b9f, 1},
-       Range{0x0ba3, 0x0ba4, 1},
-       Range{0x0ba8, 0x0baa, 1},
-       Range{0x0bae, 0x0bb9, 1},
-       Range{0x0bd0, 0x0bd0, 1},
-       Range{0x0c05, 0x0c0c, 1},
-       Range{0x0c0e, 0x0c10, 1},
-       Range{0x0c12, 0x0c28, 1},
-       Range{0x0c2a, 0x0c33, 1},
-       Range{0x0c35, 0x0c39, 1},
-       Range{0x0c3d, 0x0c3d, 1},
-       Range{0x0c58, 0x0c59, 1},
-       Range{0x0c60, 0x0c61, 1},
-       Range{0x0c85, 0x0c8c, 1},
-       Range{0x0c8e, 0x0c90, 1},
-       Range{0x0c92, 0x0ca8, 1},
-       Range{0x0caa, 0x0cb3, 1},
-       Range{0x0cb5, 0x0cb9, 1},
-       Range{0x0cbd, 0x0cde, 33},
-       Range{0x0ce0, 0x0ce1, 1},
-       Range{0x0d05, 0x0d0c, 1},
-       Range{0x0d0e, 0x0d10, 1},
-       Range{0x0d12, 0x0d28, 1},
-       Range{0x0d2a, 0x0d39, 1},
-       Range{0x0d3d, 0x0d3d, 1},
-       Range{0x0d60, 0x0d61, 1},
-       Range{0x0d7a, 0x0d7f, 1},
-       Range{0x0d85, 0x0d96, 1},
-       Range{0x0d9a, 0x0db1, 1},
-       Range{0x0db3, 0x0dbb, 1},
-       Range{0x0dbd, 0x0dbd, 1},
-       Range{0x0dc0, 0x0dc6, 1},
-       Range{0x0e01, 0x0e30, 1},
-       Range{0x0e32, 0x0e33, 1},
-       Range{0x0e40, 0x0e46, 1},
-       Range{0x0e81, 0x0e82, 1},
-       Range{0x0e84, 0x0e84, 1},
-       Range{0x0e87, 0x0e88, 1},
-       Range{0x0e8a, 0x0e8d, 3},
-       Range{0x0e94, 0x0e97, 1},
-       Range{0x0e99, 0x0e9f, 1},
-       Range{0x0ea1, 0x0ea3, 1},
-       Range{0x0ea5, 0x0ea7, 2},
-       Range{0x0eaa, 0x0eab, 1},
-       Range{0x0ead, 0x0eb0, 1},
-       Range{0x0eb2, 0x0eb3, 1},
-       Range{0x0ebd, 0x0ebd, 1},
-       Range{0x0ec0, 0x0ec4, 1},
-       Range{0x0ec6, 0x0ec6, 1},
-       Range{0x0edc, 0x0edd, 1},
-       Range{0x0f00, 0x0f00, 1},
-       Range{0x0f40, 0x0f47, 1},
-       Range{0x0f49, 0x0f6c, 1},
-       Range{0x0f88, 0x0f8b, 1},
-       Range{0x1000, 0x102a, 1},
-       Range{0x103f, 0x103f, 1},
-       Range{0x1050, 0x1055, 1},
-       Range{0x105a, 0x105d, 1},
-       Range{0x1061, 0x1061, 1},
-       Range{0x1065, 0x1066, 1},
-       Range{0x106e, 0x1070, 1},
-       Range{0x1075, 0x1081, 1},
-       Range{0x108e, 0x108e, 1},
-       Range{0x10a0, 0x10c5, 1},
-       Range{0x10d0, 0x10fa, 1},
-       Range{0x10fc, 0x10fc, 1},
-       Range{0x1100, 0x1159, 1},
-       Range{0x115f, 0x11a2, 1},
-       Range{0x11a8, 0x11f9, 1},
-       Range{0x1200, 0x1248, 1},
-       Range{0x124a, 0x124d, 1},
-       Range{0x1250, 0x1256, 1},
-       Range{0x1258, 0x1258, 1},
-       Range{0x125a, 0x125d, 1},
-       Range{0x1260, 0x1288, 1},
-       Range{0x128a, 0x128d, 1},
-       Range{0x1290, 0x12b0, 1},
-       Range{0x12b2, 0x12b5, 1},
-       Range{0x12b8, 0x12be, 1},
-       Range{0x12c0, 0x12c0, 1},
-       Range{0x12c2, 0x12c5, 1},
-       Range{0x12c8, 0x12d6, 1},
-       Range{0x12d8, 0x1310, 1},
-       Range{0x1312, 0x1315, 1},
-       Range{0x1318, 0x135a, 1},
-       Range{0x1380, 0x138f, 1},
-       Range{0x13a0, 0x13f4, 1},
-       Range{0x1401, 0x166c, 1},
-       Range{0x166f, 0x1676, 1},
-       Range{0x1681, 0x169a, 1},
-       Range{0x16a0, 0x16ea, 1},
-       Range{0x1700, 0x170c, 1},
-       Range{0x170e, 0x1711, 1},
-       Range{0x1720, 0x1731, 1},
-       Range{0x1740, 0x1751, 1},
-       Range{0x1760, 0x176c, 1},
-       Range{0x176e, 0x1770, 1},
-       Range{0x1780, 0x17b3, 1},
-       Range{0x17d7, 0x17dc, 5},
-       Range{0x1820, 0x1877, 1},
-       Range{0x1880, 0x18a8, 1},
-       Range{0x18aa, 0x18aa, 1},
-       Range{0x1900, 0x191c, 1},
-       Range{0x1950, 0x196d, 1},
-       Range{0x1970, 0x1974, 1},
-       Range{0x1980, 0x19a9, 1},
-       Range{0x19c1, 0x19c7, 1},
-       Range{0x1a00, 0x1a16, 1},
-       Range{0x1b05, 0x1b33, 1},
-       Range{0x1b45, 0x1b4b, 1},
-       Range{0x1b83, 0x1ba0, 1},
-       Range{0x1bae, 0x1baf, 1},
-       Range{0x1c00, 0x1c23, 1},
-       Range{0x1c4d, 0x1c4f, 1},
-       Range{0x1c5a, 0x1c7d, 1},
-       Range{0x1d00, 0x1dbf, 1},
-       Range{0x1e00, 0x1f15, 1},
-       Range{0x1f18, 0x1f1d, 1},
-       Range{0x1f20, 0x1f45, 1},
-       Range{0x1f48, 0x1f4d, 1},
-       Range{0x1f50, 0x1f57, 1},
-       Range{0x1f59, 0x1f5d, 2},
-       Range{0x1f5f, 0x1f7d, 1},
-       Range{0x1f80, 0x1fb4, 1},
-       Range{0x1fb6, 0x1fbc, 1},
-       Range{0x1fbe, 0x1fbe, 1},
-       Range{0x1fc2, 0x1fc4, 1},
-       Range{0x1fc6, 0x1fcc, 1},
-       Range{0x1fd0, 0x1fd3, 1},
-       Range{0x1fd6, 0x1fdb, 1},
-       Range{0x1fe0, 0x1fec, 1},
-       Range{0x1ff2, 0x1ff4, 1},
-       Range{0x1ff6, 0x1ffc, 1},
-       Range{0x2071, 0x207f, 14},
-       Range{0x2090, 0x2094, 1},
-       Range{0x2102, 0x2107, 5},
-       Range{0x210a, 0x2113, 1},
-       Range{0x2115, 0x2115, 1},
-       Range{0x2119, 0x211d, 1},
-       Range{0x2124, 0x2128, 2},
-       Range{0x212a, 0x212d, 1},
-       Range{0x212f, 0x2139, 1},
-       Range{0x213c, 0x213f, 1},
-       Range{0x2145, 0x2149, 1},
-       Range{0x214e, 0x214e, 1},
-       Range{0x2183, 0x2184, 1},
-       Range{0x2c00, 0x2c2e, 1},
-       Range{0x2c30, 0x2c5e, 1},
-       Range{0x2c60, 0x2c6f, 1},
-       Range{0x2c71, 0x2c7d, 1},
-       Range{0x2c80, 0x2ce4, 1},
-       Range{0x2d00, 0x2d25, 1},
-       Range{0x2d30, 0x2d65, 1},
-       Range{0x2d6f, 0x2d6f, 1},
-       Range{0x2d80, 0x2d96, 1},
-       Range{0x2da0, 0x2da6, 1},
-       Range{0x2da8, 0x2dae, 1},
-       Range{0x2db0, 0x2db6, 1},
-       Range{0x2db8, 0x2dbe, 1},
-       Range{0x2dc0, 0x2dc6, 1},
-       Range{0x2dc8, 0x2dce, 1},
-       Range{0x2dd0, 0x2dd6, 1},
-       Range{0x2dd8, 0x2dde, 1},
-       Range{0x2e2f, 0x2e2f, 1},
-       Range{0x3005, 0x3006, 1},
-       Range{0x3031, 0x3035, 1},
-       Range{0x303b, 0x303c, 1},
-       Range{0x3041, 0x3096, 1},
-       Range{0x309d, 0x309f, 1},
-       Range{0x30a1, 0x30fa, 1},
-       Range{0x30fc, 0x30ff, 1},
-       Range{0x3105, 0x312d, 1},
-       Range{0x3131, 0x318e, 1},
-       Range{0x31a0, 0x31b7, 1},
-       Range{0x31f0, 0x31ff, 1},
-       Range{0x3400, 0x4db5, 1},
-       Range{0x4e00, 0x9fc3, 1},
-       Range{0xa000, 0xa48c, 1},
-       Range{0xa500, 0xa60c, 1},
-       Range{0xa610, 0xa61f, 1},
-       Range{0xa62a, 0xa62b, 1},
-       Range{0xa640, 0xa65f, 1},
-       Range{0xa662, 0xa66e, 1},
-       Range{0xa67f, 0xa697, 1},
-       Range{0xa717, 0xa71f, 1},
-       Range{0xa722, 0xa788, 1},
-       Range{0xa78b, 0xa78c, 1},
-       Range{0xa7fb, 0xa801, 1},
-       Range{0xa803, 0xa805, 1},
-       Range{0xa807, 0xa80a, 1},
-       Range{0xa80c, 0xa822, 1},
-       Range{0xa840, 0xa873, 1},
-       Range{0xa882, 0xa8b3, 1},
-       Range{0xa90a, 0xa925, 1},
-       Range{0xa930, 0xa946, 1},
-       Range{0xaa00, 0xaa28, 1},
-       Range{0xaa40, 0xaa42, 1},
-       Range{0xaa44, 0xaa4b, 1},
-       Range{0xac00, 0xd7a3, 1},
-       Range{0xf900, 0xfa2d, 1},
-       Range{0xfa30, 0xfa6a, 1},
-       Range{0xfa70, 0xfad9, 1},
-       Range{0xfb00, 0xfb06, 1},
-       Range{0xfb13, 0xfb17, 1},
-       Range{0xfb1d, 0xfb1d, 1},
-       Range{0xfb1f, 0xfb28, 1},
-       Range{0xfb2a, 0xfb36, 1},
-       Range{0xfb38, 0xfb3c, 1},
-       Range{0xfb3e, 0xfb3e, 1},
-       Range{0xfb40, 0xfb41, 1},
-       Range{0xfb43, 0xfb44, 1},
-       Range{0xfb46, 0xfbb1, 1},
-       Range{0xfbd3, 0xfd3d, 1},
-       Range{0xfd50, 0xfd8f, 1},
-       Range{0xfd92, 0xfdc7, 1},
-       Range{0xfdf0, 0xfdfb, 1},
-       Range{0xfe70, 0xfe74, 1},
-       Range{0xfe76, 0xfefc, 1},
-       Range{0xff21, 0xff3a, 1},
-       Range{0xff41, 0xff5a, 1},
-       Range{0xff66, 0xffbe, 1},
-       Range{0xffc2, 0xffc7, 1},
-       Range{0xffca, 0xffcf, 1},
-       Range{0xffd2, 0xffd7, 1},
-       Range{0xffda, 0xffdc, 1},
-       Range{0x10000, 0x1000b, 1},
-       Range{0x1000d, 0x10026, 1},
-       Range{0x10028, 0x1003a, 1},
-       Range{0x1003c, 0x1003d, 1},
-       Range{0x1003f, 0x1004d, 1},
-       Range{0x10050, 0x1005d, 1},
-       Range{0x10080, 0x100fa, 1},
-       Range{0x10280, 0x1029c, 1},
-       Range{0x102a0, 0x102d0, 1},
-       Range{0x10300, 0x1031e, 1},
-       Range{0x10330, 0x10340, 1},
-       Range{0x10342, 0x10349, 1},
-       Range{0x10380, 0x1039d, 1},
-       Range{0x103a0, 0x103c3, 1},
-       Range{0x103c8, 0x103cf, 1},
-       Range{0x10400, 0x1049d, 1},
-       Range{0x10800, 0x10805, 1},
-       Range{0x10808, 0x10808, 1},
-       Range{0x1080a, 0x10835, 1},
-       Range{0x10837, 0x10838, 1},
-       Range{0x1083c, 0x1083f, 3},
-       Range{0x10900, 0x10915, 1},
-       Range{0x10920, 0x10939, 1},
-       Range{0x10a00, 0x10a00, 1},
-       Range{0x10a10, 0x10a13, 1},
-       Range{0x10a15, 0x10a17, 1},
-       Range{0x10a19, 0x10a33, 1},
-       Range{0x12000, 0x1236e, 1},
-       Range{0x1d400, 0x1d454, 1},
-       Range{0x1d456, 0x1d49c, 1},
-       Range{0x1d49e, 0x1d49f, 1},
-       Range{0x1d4a2, 0x1d4a2, 1},
-       Range{0x1d4a5, 0x1d4a6, 1},
-       Range{0x1d4a9, 0x1d4ac, 1},
-       Range{0x1d4ae, 0x1d4b9, 1},
-       Range{0x1d4bb, 0x1d4bb, 1},
-       Range{0x1d4bd, 0x1d4c3, 1},
-       Range{0x1d4c5, 0x1d505, 1},
-       Range{0x1d507, 0x1d50a, 1},
-       Range{0x1d50d, 0x1d514, 1},
-       Range{0x1d516, 0x1d51c, 1},
-       Range{0x1d51e, 0x1d539, 1},
-       Range{0x1d53b, 0x1d53e, 1},
-       Range{0x1d540, 0x1d544, 1},
-       Range{0x1d546, 0x1d546, 1},
-       Range{0x1d54a, 0x1d550, 1},
-       Range{0x1d552, 0x1d6a5, 1},
-       Range{0x1d6a8, 0x1d6c0, 1},
-       Range{0x1d6c2, 0x1d6da, 1},
-       Range{0x1d6dc, 0x1d6fa, 1},
-       Range{0x1d6fc, 0x1d714, 1},
-       Range{0x1d716, 0x1d734, 1},
-       Range{0x1d736, 0x1d74e, 1},
-       Range{0x1d750, 0x1d76e, 1},
-       Range{0x1d770, 0x1d788, 1},
-       Range{0x1d78a, 0x1d7a8, 1},
-       Range{0x1d7aa, 0x1d7c2, 1},
-       Range{0x1d7c4, 0x1d7cb, 1},
-       Range{0x20000, 0x2a6d6, 1},
-       Range{0x2f800, 0x2fa1d, 1},
-}
-
 // Is tests whether rune is in the specified table of ranges.
 func Is(ranges []Range, rune int) bool {
        // common case: rune is ASCII or Latin-1
@@ -566,13 +49,22 @@ func Is(ranges []Range, rune int) bool {
        return false;
 }
 
-// IsLetter reports whether the rune is an upper case letter.
+// IsUpper reports whether the rune is an upper case letter.
 func IsUpper(rune int) bool {
        return Is(Upper, rune);
 }
 
+// IsLower reports whether the rune is a lower case letter.
+func IsLower(rune int) bool {
+       return Is(Lower, rune);
+}
+
+// IsTitle reports whether the rune is a title case letter.
+func IsTitle(rune int) bool {
+       return Is(Title, rune);
+}
+
 // IsLetter reports whether the rune is a letter.
 func IsLetter(rune int) bool {
        return Is(Letter, rune);
 }
-
index 15a0a0eed6cd7b5413ad49c3a04ac8837f0aa7ab..357fb7e24124c200e00192ca157600afdb1c0373 100644 (file)
@@ -6,7 +6,7 @@ package unicode
 
 import "testing"
 
-var upper = []int{
+var upperTest = []int{
        0x41,
        0xc0,
        0xd8,
@@ -30,7 +30,7 @@ var upper = []int{
        0x1d7ca,
 }
 
-var notupper = []int{
+var notupperTest = []int{
        0x40,
        0x5b,
        0x61,
@@ -43,7 +43,7 @@ var notupper = []int{
        0x10000,
 }
 
-var letter = []int{
+var letterTest = []int{
        0x41,
        0x61,
        0xaa,
@@ -78,7 +78,7 @@ var letter = []int{
        0x2fa1d,
 }
 
-var notletter = []int{
+var notletterTest = []int{
        0x20,
        0x35,
        0x375,
@@ -90,17 +90,17 @@ var notletter = []int{
 }
 
 func TestIsLetter(t *testing.T) {
-       for i, r := range upper {
+       for i, r := range upperTest {
                if !IsLetter(r) {
                        t.Errorf("IsLetter(%#x) = false, want true\n", r);
                }
        }
-       for i, r := range letter {
+       for i, r := range letterTest {
                if !IsLetter(r) {
                        t.Errorf("IsLetter(%#x) = false, want true\n", r);
                }
        }
-       for i, r := range notletter {
+       for i, r := range notletterTest {
                if IsLetter(r) {
                        t.Errorf("IsLetter(%#x) = true, want false\n", r);
                }
@@ -108,17 +108,17 @@ func TestIsLetter(t *testing.T) {
 }
 
 func TestIsUpper(t *testing.T) {
-       for i, r := range upper {
+       for i, r := range upperTest {
                if !IsUpper(r) {
                        t.Errorf("IsUpper(%#x) = false, want true\n", r);
                }
        }
-       for i, r := range notupper {
+       for i, r := range notupperTest {
                if IsUpper(r) {
                        t.Errorf("IsUpper(%#x) = true, want false\n", r);
                }
        }
-       for i, r := range notletter {
+       for i, r := range notletterTest {
                if IsUpper(r) {
                        t.Errorf("IsUpper(%#x) = true, want false\n", r);
                }
diff --git a/src/pkg/unicode/lettertables.go b/src/pkg/unicode/lettertables.go
new file mode 100644 (file)
index 0000000..663fc37
--- /dev/null
@@ -0,0 +1,664 @@
+// Generated by running
+//     tables --digits=false --url=http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt
+// DO NOT EDIT
+
+package unicode
+
+// Letter is the set of Unicode letters.
+var Letter = letter
+var letter = []Range {
+       Range{0x0041, 0x005a, 1},
+       Range{0x0061, 0x007a, 1},
+       Range{0x00aa, 0x00b5, 11},
+       Range{0x00ba, 0x00c0, 6},
+       Range{0x00c1, 0x00d6, 1},
+       Range{0x00d8, 0x00f6, 1},
+       Range{0x00f8, 0x02c1, 1},
+       Range{0x02c6, 0x02d1, 1},
+       Range{0x02e0, 0x02e4, 1},
+       Range{0x02ec, 0x02ee, 2},
+       Range{0x0370, 0x0374, 1},
+       Range{0x0376, 0x0377, 1},
+       Range{0x037a, 0x037d, 1},
+       Range{0x0386, 0x0388, 2},
+       Range{0x0389, 0x038a, 1},
+       Range{0x038c, 0x038e, 2},
+       Range{0x038f, 0x03a1, 1},
+       Range{0x03a3, 0x03f5, 1},
+       Range{0x03f7, 0x0481, 1},
+       Range{0x048a, 0x0523, 1},
+       Range{0x0531, 0x0556, 1},
+       Range{0x0559, 0x0561, 8},
+       Range{0x0562, 0x0587, 1},
+       Range{0x05d0, 0x05ea, 1},
+       Range{0x05f0, 0x05f2, 1},
+       Range{0x0621, 0x064a, 1},
+       Range{0x066e, 0x066f, 1},
+       Range{0x0671, 0x06d3, 1},
+       Range{0x06d5, 0x06e5, 16},
+       Range{0x06e6, 0x06ee, 8},
+       Range{0x06ef, 0x06fa, 11},
+       Range{0x06fb, 0x06fc, 1},
+       Range{0x06ff, 0x0710, 17},
+       Range{0x0712, 0x072f, 1},
+       Range{0x074d, 0x07a5, 1},
+       Range{0x07b1, 0x07ca, 25},
+       Range{0x07cb, 0x07ea, 1},
+       Range{0x07f4, 0x07f5, 1},
+       Range{0x07fa, 0x0904, 266},
+       Range{0x0905, 0x0939, 1},
+       Range{0x093d, 0x0950, 19},
+       Range{0x0958, 0x0961, 1},
+       Range{0x0971, 0x0972, 1},
+       Range{0x097b, 0x097f, 1},
+       Range{0x0985, 0x098c, 1},
+       Range{0x098f, 0x0990, 1},
+       Range{0x0993, 0x09a8, 1},
+       Range{0x09aa, 0x09b0, 1},
+       Range{0x09b2, 0x09b6, 4},
+       Range{0x09b7, 0x09b9, 1},
+       Range{0x09bd, 0x09ce, 17},
+       Range{0x09dc, 0x09dd, 1},
+       Range{0x09df, 0x09e1, 1},
+       Range{0x09f0, 0x09f1, 1},
+       Range{0x0a05, 0x0a0a, 1},
+       Range{0x0a0f, 0x0a10, 1},
+       Range{0x0a13, 0x0a28, 1},
+       Range{0x0a2a, 0x0a30, 1},
+       Range{0x0a32, 0x0a33, 1},
+       Range{0x0a35, 0x0a36, 1},
+       Range{0x0a38, 0x0a39, 1},
+       Range{0x0a59, 0x0a5c, 1},
+       Range{0x0a5e, 0x0a72, 20},
+       Range{0x0a73, 0x0a74, 1},
+       Range{0x0a85, 0x0a8d, 1},
+       Range{0x0a8f, 0x0a91, 1},
+       Range{0x0a93, 0x0aa8, 1},
+       Range{0x0aaa, 0x0ab0, 1},
+       Range{0x0ab2, 0x0ab3, 1},
+       Range{0x0ab5, 0x0ab9, 1},
+       Range{0x0abd, 0x0ad0, 19},
+       Range{0x0ae0, 0x0ae1, 1},
+       Range{0x0b05, 0x0b0c, 1},
+       Range{0x0b0f, 0x0b10, 1},
+       Range{0x0b13, 0x0b28, 1},
+       Range{0x0b2a, 0x0b30, 1},
+       Range{0x0b32, 0x0b33, 1},
+       Range{0x0b35, 0x0b39, 1},
+       Range{0x0b3d, 0x0b5c, 31},
+       Range{0x0b5d, 0x0b5f, 2},
+       Range{0x0b60, 0x0b61, 1},
+       Range{0x0b71, 0x0b83, 18},
+       Range{0x0b85, 0x0b8a, 1},
+       Range{0x0b8e, 0x0b90, 1},
+       Range{0x0b92, 0x0b95, 1},
+       Range{0x0b99, 0x0b9a, 1},
+       Range{0x0b9c, 0x0b9e, 2},
+       Range{0x0b9f, 0x0ba3, 4},
+       Range{0x0ba4, 0x0ba8, 4},
+       Range{0x0ba9, 0x0baa, 1},
+       Range{0x0bae, 0x0bb9, 1},
+       Range{0x0bd0, 0x0c05, 53},
+       Range{0x0c06, 0x0c0c, 1},
+       Range{0x0c0e, 0x0c10, 1},
+       Range{0x0c12, 0x0c28, 1},
+       Range{0x0c2a, 0x0c33, 1},
+       Range{0x0c35, 0x0c39, 1},
+       Range{0x0c3d, 0x0c58, 27},
+       Range{0x0c59, 0x0c60, 7},
+       Range{0x0c61, 0x0c85, 36},
+       Range{0x0c86, 0x0c8c, 1},
+       Range{0x0c8e, 0x0c90, 1},
+       Range{0x0c92, 0x0ca8, 1},
+       Range{0x0caa, 0x0cb3, 1},
+       Range{0x0cb5, 0x0cb9, 1},
+       Range{0x0cbd, 0x0cde, 33},
+       Range{0x0ce0, 0x0ce1, 1},
+       Range{0x0d05, 0x0d0c, 1},
+       Range{0x0d0e, 0x0d10, 1},
+       Range{0x0d12, 0x0d28, 1},
+       Range{0x0d2a, 0x0d39, 1},
+       Range{0x0d3d, 0x0d60, 35},
+       Range{0x0d61, 0x0d7a, 25},
+       Range{0x0d7b, 0x0d7f, 1},
+       Range{0x0d85, 0x0d96, 1},
+       Range{0x0d9a, 0x0db1, 1},
+       Range{0x0db3, 0x0dbb, 1},
+       Range{0x0dbd, 0x0dc0, 3},
+       Range{0x0dc1, 0x0dc6, 1},
+       Range{0x0e01, 0x0e30, 1},
+       Range{0x0e32, 0x0e33, 1},
+       Range{0x0e40, 0x0e46, 1},
+       Range{0x0e81, 0x0e82, 1},
+       Range{0x0e84, 0x0e87, 3},
+       Range{0x0e88, 0x0e8a, 2},
+       Range{0x0e8d, 0x0e94, 7},
+       Range{0x0e95, 0x0e97, 1},
+       Range{0x0e99, 0x0e9f, 1},
+       Range{0x0ea1, 0x0ea3, 1},
+       Range{0x0ea5, 0x0ea7, 2},
+       Range{0x0eaa, 0x0eab, 1},
+       Range{0x0ead, 0x0eb0, 1},
+       Range{0x0eb2, 0x0eb3, 1},
+       Range{0x0ebd, 0x0ec0, 3},
+       Range{0x0ec1, 0x0ec4, 1},
+       Range{0x0ec6, 0x0edc, 22},
+       Range{0x0edd, 0x0f00, 35},
+       Range{0x0f40, 0x0f47, 1},
+       Range{0x0f49, 0x0f6c, 1},
+       Range{0x0f88, 0x0f8b, 1},
+       Range{0x1000, 0x102a, 1},
+       Range{0x103f, 0x1050, 17},
+       Range{0x1051, 0x1055, 1},
+       Range{0x105a, 0x105d, 1},
+       Range{0x1061, 0x1065, 4},
+       Range{0x1066, 0x106e, 8},
+       Range{0x106f, 0x1070, 1},
+       Range{0x1075, 0x1081, 1},
+       Range{0x108e, 0x10a0, 18},
+       Range{0x10a1, 0x10c5, 1},
+       Range{0x10d0, 0x10fa, 1},
+       Range{0x10fc, 0x1100, 4},
+       Range{0x1101, 0x1159, 1},
+       Range{0x115f, 0x11a2, 1},
+       Range{0x11a8, 0x11f9, 1},
+       Range{0x1200, 0x1248, 1},
+       Range{0x124a, 0x124d, 1},
+       Range{0x1250, 0x1256, 1},
+       Range{0x1258, 0x125a, 2},
+       Range{0x125b, 0x125d, 1},
+       Range{0x1260, 0x1288, 1},
+       Range{0x128a, 0x128d, 1},
+       Range{0x1290, 0x12b0, 1},
+       Range{0x12b2, 0x12b5, 1},
+       Range{0x12b8, 0x12be, 1},
+       Range{0x12c0, 0x12c2, 2},
+       Range{0x12c3, 0x12c5, 1},
+       Range{0x12c8, 0x12d6, 1},
+       Range{0x12d8, 0x1310, 1},
+       Range{0x1312, 0x1315, 1},
+       Range{0x1318, 0x135a, 1},
+       Range{0x1380, 0x138f, 1},
+       Range{0x13a0, 0x13f4, 1},
+       Range{0x1401, 0x166c, 1},
+       Range{0x166f, 0x1676, 1},
+       Range{0x1681, 0x169a, 1},
+       Range{0x16a0, 0x16ea, 1},
+       Range{0x1700, 0x170c, 1},
+       Range{0x170e, 0x1711, 1},
+       Range{0x1720, 0x1731, 1},
+       Range{0x1740, 0x1751, 1},
+       Range{0x1760, 0x176c, 1},
+       Range{0x176e, 0x1770, 1},
+       Range{0x1780, 0x17b3, 1},
+       Range{0x17d7, 0x17dc, 5},
+       Range{0x1820, 0x1877, 1},
+       Range{0x1880, 0x18a8, 1},
+       Range{0x18aa, 0x1900, 86},
+       Range{0x1901, 0x191c, 1},
+       Range{0x1950, 0x196d, 1},
+       Range{0x1970, 0x1974, 1},
+       Range{0x1980, 0x19a9, 1},
+       Range{0x19c1, 0x19c7, 1},
+       Range{0x1a00, 0x1a16, 1},
+       Range{0x1b05, 0x1b33, 1},
+       Range{0x1b45, 0x1b4b, 1},
+       Range{0x1b83, 0x1ba0, 1},
+       Range{0x1bae, 0x1baf, 1},
+       Range{0x1c00, 0x1c23, 1},
+       Range{0x1c4d, 0x1c4f, 1},
+       Range{0x1c5a, 0x1c7d, 1},
+       Range{0x1d00, 0x1dbf, 1},
+       Range{0x1e00, 0x1f15, 1},
+       Range{0x1f18, 0x1f1d, 1},
+       Range{0x1f20, 0x1f45, 1},
+       Range{0x1f48, 0x1f4d, 1},
+       Range{0x1f50, 0x1f57, 1},
+       Range{0x1f59, 0x1f5f, 2},
+       Range{0x1f60, 0x1f7d, 1},
+       Range{0x1f80, 0x1fb4, 1},
+       Range{0x1fb6, 0x1fbc, 1},
+       Range{0x1fbe, 0x1fc2, 4},
+       Range{0x1fc3, 0x1fc4, 1},
+       Range{0x1fc6, 0x1fcc, 1},
+       Range{0x1fd0, 0x1fd3, 1},
+       Range{0x1fd6, 0x1fdb, 1},
+       Range{0x1fe0, 0x1fec, 1},
+       Range{0x1ff2, 0x1ff4, 1},
+       Range{0x1ff6, 0x1ffc, 1},
+       Range{0x2071, 0x207f, 14},
+       Range{0x2090, 0x2094, 1},
+       Range{0x2102, 0x2107, 5},
+       Range{0x210a, 0x2113, 1},
+       Range{0x2115, 0x2119, 4},
+       Range{0x211a, 0x211d, 1},
+       Range{0x2124, 0x212a, 2},
+       Range{0x212b, 0x212d, 1},
+       Range{0x212f, 0x2139, 1},
+       Range{0x213c, 0x213f, 1},
+       Range{0x2145, 0x2149, 1},
+       Range{0x214e, 0x2183, 53},
+       Range{0x2184, 0x2c00, 2684},
+       Range{0x2c01, 0x2c2e, 1},
+       Range{0x2c30, 0x2c5e, 1},
+       Range{0x2c60, 0x2c6f, 1},
+       Range{0x2c71, 0x2c7d, 1},
+       Range{0x2c80, 0x2ce4, 1},
+       Range{0x2d00, 0x2d25, 1},
+       Range{0x2d30, 0x2d65, 1},
+       Range{0x2d6f, 0x2d80, 17},
+       Range{0x2d81, 0x2d96, 1},
+       Range{0x2da0, 0x2da6, 1},
+       Range{0x2da8, 0x2dae, 1},
+       Range{0x2db0, 0x2db6, 1},
+       Range{0x2db8, 0x2dbe, 1},
+       Range{0x2dc0, 0x2dc6, 1},
+       Range{0x2dc8, 0x2dce, 1},
+       Range{0x2dd0, 0x2dd6, 1},
+       Range{0x2dd8, 0x2dde, 1},
+       Range{0x2e2f, 0x3005, 470},
+       Range{0x3006, 0x3031, 43},
+       Range{0x3032, 0x3035, 1},
+       Range{0x303b, 0x303c, 1},
+       Range{0x3041, 0x3096, 1},
+       Range{0x309d, 0x309f, 1},
+       Range{0x30a1, 0x30fa, 1},
+       Range{0x30fc, 0x30ff, 1},
+       Range{0x3105, 0x312d, 1},
+       Range{0x3131, 0x318e, 1},
+       Range{0x31a0, 0x31b7, 1},
+       Range{0x31f0, 0x31ff, 1},
+       Range{0x3400, 0x4db5, 6581},
+       Range{0x4e00, 0x9fc3, 20931},
+       Range{0xa000, 0xa48c, 1},
+       Range{0xa500, 0xa60c, 1},
+       Range{0xa610, 0xa61f, 1},
+       Range{0xa62a, 0xa62b, 1},
+       Range{0xa640, 0xa65f, 1},
+       Range{0xa662, 0xa66e, 1},
+       Range{0xa67f, 0xa697, 1},
+       Range{0xa717, 0xa71f, 1},
+       Range{0xa722, 0xa788, 1},
+       Range{0xa78b, 0xa78c, 1},
+       Range{0xa7fb, 0xa801, 1},
+       Range{0xa803, 0xa805, 1},
+       Range{0xa807, 0xa80a, 1},
+       Range{0xa80c, 0xa822, 1},
+       Range{0xa840, 0xa873, 1},
+       Range{0xa882, 0xa8b3, 1},
+       Range{0xa90a, 0xa925, 1},
+       Range{0xa930, 0xa946, 1},
+       Range{0xaa00, 0xaa28, 1},
+       Range{0xaa40, 0xaa42, 1},
+       Range{0xaa44, 0xaa4b, 1},
+       Range{0xac00, 0xd7a3, 11171},
+       Range{0xf900, 0xfa2d, 1},
+       Range{0xfa30, 0xfa6a, 1},
+       Range{0xfa70, 0xfad9, 1},
+       Range{0xfb00, 0xfb06, 1},
+       Range{0xfb13, 0xfb17, 1},
+       Range{0xfb1d, 0xfb1f, 2},
+       Range{0xfb20, 0xfb28, 1},
+       Range{0xfb2a, 0xfb36, 1},
+       Range{0xfb38, 0xfb3c, 1},
+       Range{0xfb3e, 0xfb40, 2},
+       Range{0xfb41, 0xfb43, 2},
+       Range{0xfb44, 0xfb46, 2},
+       Range{0xfb47, 0xfbb1, 1},
+       Range{0xfbd3, 0xfd3d, 1},
+       Range{0xfd50, 0xfd8f, 1},
+       Range{0xfd92, 0xfdc7, 1},
+       Range{0xfdf0, 0xfdfb, 1},
+       Range{0xfe70, 0xfe74, 1},
+       Range{0xfe76, 0xfefc, 1},
+       Range{0xff21, 0xff3a, 1},
+       Range{0xff41, 0xff5a, 1},
+       Range{0xff66, 0xffbe, 1},
+       Range{0xffc2, 0xffc7, 1},
+       Range{0xffca, 0xffcf, 1},
+       Range{0xffd2, 0xffd7, 1},
+       Range{0xffda, 0xffdc, 1},
+       Range{0x10000, 0x1000b, 1},
+       Range{0x1000d, 0x10026, 1},
+       Range{0x10028, 0x1003a, 1},
+       Range{0x1003c, 0x1003d, 1},
+       Range{0x1003f, 0x1004d, 1},
+       Range{0x10050, 0x1005d, 1},
+       Range{0x10080, 0x100fa, 1},
+       Range{0x10280, 0x1029c, 1},
+       Range{0x102a0, 0x102d0, 1},
+       Range{0x10300, 0x1031e, 1},
+       Range{0x10330, 0x10340, 1},
+       Range{0x10342, 0x10349, 1},
+       Range{0x10380, 0x1039d, 1},
+       Range{0x103a0, 0x103c3, 1},
+       Range{0x103c8, 0x103cf, 1},
+       Range{0x10400, 0x1049d, 1},
+       Range{0x10800, 0x10805, 1},
+       Range{0x10808, 0x1080a, 2},
+       Range{0x1080b, 0x10835, 1},
+       Range{0x10837, 0x10838, 1},
+       Range{0x1083c, 0x1083f, 3},
+       Range{0x10900, 0x10915, 1},
+       Range{0x10920, 0x10939, 1},
+       Range{0x10a00, 0x10a10, 16},
+       Range{0x10a11, 0x10a13, 1},
+       Range{0x10a15, 0x10a17, 1},
+       Range{0x10a19, 0x10a33, 1},
+       Range{0x12000, 0x1236e, 1},
+       Range{0x1d400, 0x1d454, 1},
+       Range{0x1d456, 0x1d49c, 1},
+       Range{0x1d49e, 0x1d49f, 1},
+       Range{0x1d4a2, 0x1d4a5, 3},
+       Range{0x1d4a6, 0x1d4a9, 3},
+       Range{0x1d4aa, 0x1d4ac, 1},
+       Range{0x1d4ae, 0x1d4b9, 1},
+       Range{0x1d4bb, 0x1d4bd, 2},
+       Range{0x1d4be, 0x1d4c3, 1},
+       Range{0x1d4c5, 0x1d505, 1},
+       Range{0x1d507, 0x1d50a, 1},
+       Range{0x1d50d, 0x1d514, 1},
+       Range{0x1d516, 0x1d51c, 1},
+       Range{0x1d51e, 0x1d539, 1},
+       Range{0x1d53b, 0x1d53e, 1},
+       Range{0x1d540, 0x1d544, 1},
+       Range{0x1d546, 0x1d54a, 4},
+       Range{0x1d54b, 0x1d550, 1},
+       Range{0x1d552, 0x1d6a5, 1},
+       Range{0x1d6a8, 0x1d6c0, 1},
+       Range{0x1d6c2, 0x1d6da, 1},
+       Range{0x1d6dc, 0x1d6fa, 1},
+       Range{0x1d6fc, 0x1d714, 1},
+       Range{0x1d716, 0x1d734, 1},
+       Range{0x1d736, 0x1d74e, 1},
+       Range{0x1d750, 0x1d76e, 1},
+       Range{0x1d770, 0x1d788, 1},
+       Range{0x1d78a, 0x1d7a8, 1},
+       Range{0x1d7aa, 0x1d7c2, 1},
+       Range{0x1d7c4, 0x1d7cb, 1},
+       Range{0x20000, 0x2a6d6, 42710},
+       Range{0x2f800, 0x2fa1d, 1},
+}
+
+// Upper is the set of Unicode upper case letters.
+var Upper = upper
+var upper = []Range {
+       Range{0x0041, 0x005a, 1},
+       Range{0x00c0, 0x00d6, 1},
+       Range{0x00d8, 0x00de, 1},
+       Range{0x0100, 0x0136, 2},
+       Range{0x0139, 0x0147, 2},
+       Range{0x014a, 0x0178, 2},
+       Range{0x0179, 0x017d, 2},
+       Range{0x0181, 0x0182, 1},
+       Range{0x0184, 0x0186, 2},
+       Range{0x0187, 0x0189, 2},
+       Range{0x018a, 0x018b, 1},
+       Range{0x018e, 0x0191, 1},
+       Range{0x0193, 0x0194, 1},
+       Range{0x0196, 0x0198, 1},
+       Range{0x019c, 0x019d, 1},
+       Range{0x019f, 0x01a0, 1},
+       Range{0x01a2, 0x01a6, 2},
+       Range{0x01a7, 0x01a9, 2},
+       Range{0x01ac, 0x01ae, 2},
+       Range{0x01af, 0x01b1, 2},
+       Range{0x01b2, 0x01b3, 1},
+       Range{0x01b5, 0x01b7, 2},
+       Range{0x01b8, 0x01bc, 4},
+       Range{0x01c4, 0x01cd, 3},
+       Range{0x01cf, 0x01db, 2},
+       Range{0x01de, 0x01ee, 2},
+       Range{0x01f1, 0x01f4, 3},
+       Range{0x01f6, 0x01f8, 1},
+       Range{0x01fa, 0x0232, 2},
+       Range{0x023a, 0x023b, 1},
+       Range{0x023d, 0x023e, 1},
+       Range{0x0241, 0x0243, 2},
+       Range{0x0244, 0x0246, 1},
+       Range{0x0248, 0x024e, 2},
+       Range{0x0370, 0x0372, 2},
+       Range{0x0376, 0x0386, 16},
+       Range{0x0388, 0x038a, 1},
+       Range{0x038c, 0x038e, 2},
+       Range{0x038f, 0x0391, 2},
+       Range{0x0392, 0x03a1, 1},
+       Range{0x03a3, 0x03ab, 1},
+       Range{0x03cf, 0x03d2, 3},
+       Range{0x03d3, 0x03d4, 1},
+       Range{0x03d8, 0x03ee, 2},
+       Range{0x03f4, 0x03f7, 3},
+       Range{0x03f9, 0x03fa, 1},
+       Range{0x03fd, 0x042f, 1},
+       Range{0x0460, 0x0480, 2},
+       Range{0x048a, 0x04c0, 2},
+       Range{0x04c1, 0x04cd, 2},
+       Range{0x04d0, 0x0522, 2},
+       Range{0x0531, 0x0556, 1},
+       Range{0x10a0, 0x10c5, 1},
+       Range{0x1e00, 0x1e94, 2},
+       Range{0x1e9e, 0x1efe, 2},
+       Range{0x1f08, 0x1f0f, 1},
+       Range{0x1f18, 0x1f1d, 1},
+       Range{0x1f28, 0x1f2f, 1},
+       Range{0x1f38, 0x1f3f, 1},
+       Range{0x1f48, 0x1f4d, 1},
+       Range{0x1f59, 0x1f5f, 2},
+       Range{0x1f68, 0x1f6f, 1},
+       Range{0x1fb8, 0x1fbb, 1},
+       Range{0x1fc8, 0x1fcb, 1},
+       Range{0x1fd8, 0x1fdb, 1},
+       Range{0x1fe8, 0x1fec, 1},
+       Range{0x1ff8, 0x1ffb, 1},
+       Range{0x2102, 0x2107, 5},
+       Range{0x210b, 0x210d, 1},
+       Range{0x2110, 0x2112, 1},
+       Range{0x2115, 0x2119, 4},
+       Range{0x211a, 0x211d, 1},
+       Range{0x2124, 0x212a, 2},
+       Range{0x212b, 0x212d, 1},
+       Range{0x2130, 0x2133, 1},
+       Range{0x213e, 0x213f, 1},
+       Range{0x2145, 0x2183, 62},
+       Range{0x2c00, 0x2c2e, 1},
+       Range{0x2c60, 0x2c62, 2},
+       Range{0x2c63, 0x2c64, 1},
+       Range{0x2c67, 0x2c6d, 2},
+       Range{0x2c6e, 0x2c6f, 1},
+       Range{0x2c72, 0x2c75, 3},
+       Range{0x2c80, 0x2ce2, 2},
+       Range{0xa640, 0xa65e, 2},
+       Range{0xa662, 0xa66c, 2},
+       Range{0xa680, 0xa696, 2},
+       Range{0xa722, 0xa72e, 2},
+       Range{0xa732, 0xa76e, 2},
+       Range{0xa779, 0xa77d, 2},
+       Range{0xa77e, 0xa786, 2},
+       Range{0xa78b, 0xff21, 22422},
+       Range{0xff22, 0xff3a, 1},
+       Range{0x10400, 0x10427, 1},
+       Range{0x1d400, 0x1d419, 1},
+       Range{0x1d434, 0x1d44d, 1},
+       Range{0x1d468, 0x1d481, 1},
+       Range{0x1d49c, 0x1d49e, 2},
+       Range{0x1d49f, 0x1d4a5, 3},
+       Range{0x1d4a6, 0x1d4a9, 3},
+       Range{0x1d4aa, 0x1d4ac, 1},
+       Range{0x1d4ae, 0x1d4b5, 1},
+       Range{0x1d4d0, 0x1d4e9, 1},
+       Range{0x1d504, 0x1d505, 1},
+       Range{0x1d507, 0x1d50a, 1},
+       Range{0x1d50d, 0x1d514, 1},
+       Range{0x1d516, 0x1d51c, 1},
+       Range{0x1d538, 0x1d539, 1},
+       Range{0x1d53b, 0x1d53e, 1},
+       Range{0x1d540, 0x1d544, 1},
+       Range{0x1d546, 0x1d54a, 4},
+       Range{0x1d54b, 0x1d550, 1},
+       Range{0x1d56c, 0x1d585, 1},
+       Range{0x1d5a0, 0x1d5b9, 1},
+       Range{0x1d5d4, 0x1d5ed, 1},
+       Range{0x1d608, 0x1d621, 1},
+       Range{0x1d63c, 0x1d655, 1},
+       Range{0x1d670, 0x1d689, 1},
+       Range{0x1d6a8, 0x1d6c0, 1},
+       Range{0x1d6e2, 0x1d6fa, 1},
+       Range{0x1d71c, 0x1d734, 1},
+       Range{0x1d756, 0x1d76e, 1},
+       Range{0x1d790, 0x1d7a8, 1},
+       Range{0x1d7ca, 0x1d7ca, 1},
+}
+
+// Lower is the set of Unicode lower case letters.
+var Lower = lower
+var lower = []Range {
+       Range{0x0061, 0x007a, 1},
+       Range{0x00aa, 0x00b5, 11},
+       Range{0x00ba, 0x00df, 37},
+       Range{0x00e0, 0x00f6, 1},
+       Range{0x00f8, 0x00ff, 1},
+       Range{0x0101, 0x0137, 2},
+       Range{0x0138, 0x0148, 2},
+       Range{0x0149, 0x0177, 2},
+       Range{0x017a, 0x017e, 2},
+       Range{0x017f, 0x0180, 1},
+       Range{0x0183, 0x0185, 2},
+       Range{0x0188, 0x018c, 4},
+       Range{0x018d, 0x0192, 5},
+       Range{0x0195, 0x0199, 4},
+       Range{0x019a, 0x019b, 1},
+       Range{0x019e, 0x01a1, 3},
+       Range{0x01a3, 0x01a5, 2},
+       Range{0x01a8, 0x01aa, 2},
+       Range{0x01ab, 0x01ad, 2},
+       Range{0x01b0, 0x01b4, 4},
+       Range{0x01b6, 0x01b9, 3},
+       Range{0x01ba, 0x01bd, 3},
+       Range{0x01be, 0x01bf, 1},
+       Range{0x01c6, 0x01cc, 3},
+       Range{0x01ce, 0x01dc, 2},
+       Range{0x01dd, 0x01ef, 2},
+       Range{0x01f0, 0x01f3, 3},
+       Range{0x01f5, 0x01f9, 4},
+       Range{0x01fb, 0x0233, 2},
+       Range{0x0234, 0x0239, 1},
+       Range{0x023c, 0x023f, 3},
+       Range{0x0240, 0x0242, 2},
+       Range{0x0247, 0x024f, 2},
+       Range{0x0250, 0x0293, 1},
+       Range{0x0295, 0x02af, 1},
+       Range{0x0371, 0x0373, 2},
+       Range{0x0377, 0x037b, 4},
+       Range{0x037c, 0x037d, 1},
+       Range{0x0390, 0x03ac, 28},
+       Range{0x03ad, 0x03ce, 1},
+       Range{0x03d0, 0x03d1, 1},
+       Range{0x03d5, 0x03d7, 1},
+       Range{0x03d9, 0x03ef, 2},
+       Range{0x03f0, 0x03f3, 1},
+       Range{0x03f5, 0x03fb, 3},
+       Range{0x03fc, 0x0430, 52},
+       Range{0x0431, 0x045f, 1},
+       Range{0x0461, 0x0481, 2},
+       Range{0x048b, 0x04bf, 2},
+       Range{0x04c2, 0x04ce, 2},
+       Range{0x04cf, 0x0523, 2},
+       Range{0x0561, 0x0587, 1},
+       Range{0x1d00, 0x1d2b, 1},
+       Range{0x1d62, 0x1d77, 1},
+       Range{0x1d79, 0x1d9a, 1},
+       Range{0x1e01, 0x1e95, 2},
+       Range{0x1e96, 0x1e9d, 1},
+       Range{0x1e9f, 0x1eff, 2},
+       Range{0x1f00, 0x1f07, 1},
+       Range{0x1f10, 0x1f15, 1},
+       Range{0x1f20, 0x1f27, 1},
+       Range{0x1f30, 0x1f37, 1},
+       Range{0x1f40, 0x1f45, 1},
+       Range{0x1f50, 0x1f57, 1},
+       Range{0x1f60, 0x1f67, 1},
+       Range{0x1f70, 0x1f7d, 1},
+       Range{0x1f80, 0x1f87, 1},
+       Range{0x1f90, 0x1f97, 1},
+       Range{0x1fa0, 0x1fa7, 1},
+       Range{0x1fb0, 0x1fb4, 1},
+       Range{0x1fb6, 0x1fb7, 1},
+       Range{0x1fbe, 0x1fc2, 4},
+       Range{0x1fc3, 0x1fc4, 1},
+       Range{0x1fc6, 0x1fc7, 1},
+       Range{0x1fd0, 0x1fd3, 1},
+       Range{0x1fd6, 0x1fd7, 1},
+       Range{0x1fe0, 0x1fe7, 1},
+       Range{0x1ff2, 0x1ff4, 1},
+       Range{0x1ff6, 0x1ff7, 1},
+       Range{0x2071, 0x207f, 14},
+       Range{0x210a, 0x210e, 4},
+       Range{0x210f, 0x2113, 4},
+       Range{0x212f, 0x2139, 5},
+       Range{0x213c, 0x213d, 1},
+       Range{0x2146, 0x2149, 1},
+       Range{0x214e, 0x2184, 54},
+       Range{0x2c30, 0x2c5e, 1},
+       Range{0x2c61, 0x2c65, 4},
+       Range{0x2c66, 0x2c6c, 2},
+       Range{0x2c71, 0x2c73, 2},
+       Range{0x2c74, 0x2c76, 2},
+       Range{0x2c77, 0x2c7c, 1},
+       Range{0x2c81, 0x2ce3, 2},
+       Range{0x2ce4, 0x2d00, 28},
+       Range{0x2d01, 0x2d25, 1},
+       Range{0xa641, 0xa65f, 2},
+       Range{0xa663, 0xa66d, 2},
+       Range{0xa681, 0xa697, 2},
+       Range{0xa723, 0xa72f, 2},
+       Range{0xa730, 0xa731, 1},
+       Range{0xa733, 0xa771, 2},
+       Range{0xa772, 0xa778, 1},
+       Range{0xa77a, 0xa77c, 2},
+       Range{0xa77f, 0xa787, 2},
+       Range{0xa78c, 0xfb00, 21364},
+       Range{0xfb01, 0xfb06, 1},
+       Range{0xfb13, 0xfb17, 1},
+       Range{0xff41, 0xff5a, 1},
+       Range{0x10428, 0x1044f, 1},
+       Range{0x1d41a, 0x1d433, 1},
+       Range{0x1d44e, 0x1d454, 1},
+       Range{0x1d456, 0x1d467, 1},
+       Range{0x1d482, 0x1d49b, 1},
+       Range{0x1d4b6, 0x1d4b9, 1},
+       Range{0x1d4bb, 0x1d4bd, 2},
+       Range{0x1d4be, 0x1d4c3, 1},
+       Range{0x1d4c5, 0x1d4cf, 1},
+       Range{0x1d4ea, 0x1d503, 1},
+       Range{0x1d51e, 0x1d537, 1},
+       Range{0x1d552, 0x1d56b, 1},
+       Range{0x1d586, 0x1d59f, 1},
+       Range{0x1d5ba, 0x1d5d3, 1},
+       Range{0x1d5ee, 0x1d607, 1},
+       Range{0x1d622, 0x1d63b, 1},
+       Range{0x1d656, 0x1d66f, 1},
+       Range{0x1d68a, 0x1d6a5, 1},
+       Range{0x1d6c2, 0x1d6da, 1},
+       Range{0x1d6dc, 0x1d6e1, 1},
+       Range{0x1d6fc, 0x1d714, 1},
+       Range{0x1d716, 0x1d71b, 1},
+       Range{0x1d736, 0x1d74e, 1},
+       Range{0x1d750, 0x1d755, 1},
+       Range{0x1d770, 0x1d788, 1},
+       Range{0x1d78a, 0x1d78f, 1},
+       Range{0x1d7aa, 0x1d7c2, 1},
+       Range{0x1d7c4, 0x1d7c9, 1},
+       Range{0x1d7cb, 0x1d7cb, 1},
+}
+
+// Title is the set of Unicode title case letters.
+var Title = title
+var title = []Range {
+       Range{0x01c5, 0x01cb, 3},
+       Range{0x01f2, 0x1f88, 7574},
+       Range{0x1f89, 0x1f8f, 1},
+       Range{0x1f98, 0x1f9f, 1},
+       Range{0x1fa8, 0x1faf, 1},
+       Range{0x1fbc, 0x1fcc, 16},
+       Range{0x1ffc, 0x1ffc, 1},
+}
diff --git a/src/pkg/unicode/maketables.go b/src/pkg/unicode/maketables.go
new file mode 100644 (file)
index 0000000..4668136
--- /dev/null
@@ -0,0 +1,281 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Unicode table generator.
+// Data read from the web.
+
+package main
+
+import (
+       "bufio";
+       "flag";
+       "fmt";
+       "http";
+       "log";
+       "os";
+       "strconv";
+       "strings";
+)
+
+var url = flag.String("url", "http://www.unicode.org/Public/5.1.0/ucd/UnicodeData.txt", "URL of Unicode database")
+var digits = flag.Bool("digits", false, "whether to generate digit tables; default is letter tables");
+
+var die = log.New(os.Stderr, nil, "", log.Lexit|log.Lshortfile);
+
+// Data has form:
+//     0037;DIGIT SEVEN;Nd;0;EN;;7;7;7;N;;;;;
+//     007A;LATIN SMALL LETTER Z;Ll;0;L;;;;;N;;;005A;;005A
+// See http://www.unicode.org/Public/5.1.0/ucd/UCD.html for full explanation
+// The fields
+const (
+       FCodePoint = iota;
+       FName;
+       FGeneralCategory;
+       FCanonicalCombiningClass;
+       FBidiClass;
+       FDecompositionType;
+       FDecompositionMapping;
+       FNumericType;
+       FNumericValue;
+       FBidiMirrored;
+       FUnicode1Name;
+       FISOComment;
+       FSimpleUppercaseMapping;
+       FSimpleLowercaseMapping;
+       FSimpleTitlecaseMapping;
+       NumField;
+
+       MaxChar = 0xF0000;      // anything above this doesn't have useful properties
+)
+
+var fieldName = []string{
+       "CodePoint",
+       "Name",
+       "GeneralCategory",
+       "CanonicalCombiningClass",
+       "BidiClass",
+       "DecompositionType",
+       "DecompositionMapping",
+       "NumericType",
+       "NumericValue",
+       "BidiMirrored",
+       "Unicode1Name",
+       "ISOComment",
+       "SimpleUppercaseMapping",
+       "SimpleLowercaseMapping",
+       "SimpleTitlecaseMapping"
+}
+
+// This contains only the properties we're interested in.
+type Char struct {
+       field   []string;       // debugging only; could be deleted if we take out char.dump()
+       codePoint       uint32; // redundant (it's the index in the chars table) but useful
+       category        string;
+       numValue        int;
+       upperCase       uint32;
+       lowerCase       uint32;
+       titleCase       uint32;
+}
+
+var chars = make([]Char, MaxChar)
+
+var lastChar uint32 = 0;
+
+func parse(line string) {
+       field := strings.Split(line, ";", -1);
+       if len(field) != NumField {
+               die.Logf("%.5s...: %d fields (expected %d)\n", line, len(field), NumField);
+       }
+       point, err := strconv.Btoui64(field[FCodePoint], 16);
+       if err != nil {
+               die.Log("%.5s...:", err)
+       }
+       lastChar = uint32(point);
+       if point == 0 {
+               return  // not interesting and we use 0 as unset
+       }
+       if point >= MaxChar {
+               fmt.Fprintf(os.Stderr, "ignoring char U+%04x\n", point);
+               return;
+       }
+       char := &chars[point];
+       char.field=field;
+       if char.codePoint != 0 {
+               die.Logf("point U+%04x reused\n");
+       }
+       char.codePoint = lastChar;
+       char.category = field[FGeneralCategory];
+       switch char.category {
+       case "Nd":
+               // Decimal digit
+               v, err := strconv.Atoi(field[FNumericValue]);
+               if err != nil {
+                       die.Log("U+%04x: bad numeric field: %s", point, err);
+               }
+               char.numValue = v;
+       case "Lu":
+               char.letter(field[FCodePoint], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]);
+       case "Ll":
+               char.letter(field[FSimpleUppercaseMapping], field[FCodePoint], field[FSimpleTitlecaseMapping]);
+       case "Lt":
+               char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FCodePoint]);
+       case "Lm", "Lo":
+               char.letter(field[FSimpleUppercaseMapping], field[FSimpleLowercaseMapping], field[FSimpleTitlecaseMapping]);
+       }
+}
+
+func (char *Char) dump(s string) {
+       fmt.Print(s, " ");
+       for i:=0;i<len(char.field);i++ {
+               fmt.Printf("%s:%q ", fieldName[i], char.field[i]);
+       }
+       fmt.Print("\n");
+}
+
+func (char *Char) letter(u, l, t string) {
+       char.upperCase = char.letterValue(u, "U");
+       char.lowerCase = char.letterValue(l, "L");
+       char.titleCase = char.letterValue(t, "T");
+}
+
+func (char *Char) letterValue(s string, cas string) uint32 {
+       if s == "" {
+               return 0
+       }
+       v, err := strconv.Btoui64(s, 16);
+       if err != nil {
+               char.dump(cas);
+               die.Logf("U+%04x: bad letter(%s): %s", char.codePoint, s, err)
+       }
+       return uint32(v)
+}
+
+func main() {
+       flag.Parse();
+
+       resp, _, err := http.Get(*url);
+       if err != nil {
+               die.Log(err);
+       }
+       input := bufio.NewReader(resp.Body);
+       for {
+               line, err := input.ReadLineString('\n', false);
+               if err != nil {
+                       if err == os.EOF {
+                               break;
+                       }
+                       die.Log(err);
+               }
+               parse(line);
+       }
+       resp.Body.Close();
+       fmt.Printf(
+               "// Generated by running\n"
+               "//     tables --digits=%t --url=%s\n"
+               "// DO NOT EDIT\n\n"
+               "package unicode\n",
+               *digits,
+               *url
+       );
+       // We generate an UpperCase name to serve as concise documentation and a lowerCase
+       // name to store the data.  This stops godoc dumping all the tables but keeps them
+       // available to clients.
+       if *digits {
+               dumpRange(
+                       "\n// DecimalDigit is the set of Unicode characters with the \"decimal digit\" property.\n"
+                       "var DecimalDigit = decimalDigit\n"
+                       "var decimalDigit = []Range {\n",
+                       func(code int) bool { return chars[code].category == "Nd" },
+                       "}\n"
+               );
+       } else {
+               dumpRange(
+                       "\n// Letter is the set of Unicode letters.\n"
+                       "var Letter = letter\n"
+                       "var letter = []Range {\n",
+                       func(code int) bool {
+                               switch chars[code].category {
+                               case "Lu", "Ll", "Lt", "Lm", "Lo":
+                                       return true
+                               }
+                               return false
+                       },
+                       "}\n"
+               );
+               dumpRange(
+                       "\n// Upper is the set of Unicode upper case letters.\n"
+                       "var Upper = upper\n"
+                       "var upper = []Range {\n",
+                       func(code int) bool { return chars[code].category == "Lu" },
+                       "}\n"
+               );
+               dumpRange(
+                       "\n// Lower is the set of Unicode lower case letters.\n"
+                       "var Lower = lower\n"
+                       "var lower = []Range {\n",
+                       func(code int) bool { return chars[code].category == "Ll" },
+                       "}\n"
+               );
+               dumpRange(
+                       "\n// Title is the set of Unicode title case letters.\n"
+                       "var Title = title\n"
+                       "var title = []Range {\n",
+                       func(code int) bool { return chars[code].category == "Lt" },
+                       "}\n"
+               );
+       }
+}
+
+type Op func(code int) bool
+
+func dumpRange(header string, inCategory Op, trailer string) {
+       fmt.Print(header);
+       const format = "\tRange{0x%04x, 0x%04x, %d},\n";
+       next := 0;
+       // one Range for each iteration
+       for {
+               // look for start of range
+               for next < len(chars) && !inCategory(next) {
+                       next++
+               }
+               if next >= len(chars) {
+                       // no characters remain
+                       break
+               }
+
+               // start of range
+               lo := next;
+               hi := next;
+               stride := 1;
+               // accept lo
+               next++;
+               // look for another character to set the stride
+               for next < len(chars) && !inCategory(next) {
+                       next++
+               }
+               if next >= len(chars) {
+                       // no more characters
+                       fmt.Printf(format, lo, hi, stride);
+                       break;
+               }
+               // set stride
+               stride = next - lo;
+               // check for length of run. next points to first jump in stride
+               for i := next; i < len(chars); i++ {
+                       if inCategory(i) == (((i-lo)%stride) == 0) {
+                               // accept
+                               if inCategory(i) {
+                                       hi = i
+                               }
+                       } else {
+                               // no more characters in this run
+                               break
+                       }
+               }
+               fmt.Printf(format, lo, hi, stride);
+               // next range: start looking where this range ends
+               next = hi + 1;
+       }
+       fmt.Print(trailer);
+}