`
+var categoryMapping = map[string]string{
+ "Lu": "Letter, uppercase",
+ "Ll": "Letter, lowercase",
+ "Lt": "Letter, titlecase",
+ "Lm": "Letter, modifier",
+ "Lo": "Letter, other",
+ "Mn": "Mark, nonspacing",
+ "Mc": "Mark, spacing combining",
+ "Me": "Mark, enclosing",
+ "Nd": "Number, decimal digit",
+ "Nl": "Number, letter",
+ "No": "Number, other",
+ "Pc": "Punctuation, connector",
+ "Pd": "Punctuation, dash",
+ "Ps": "Punctuation, open",
+ "Pe": "Punctuation, close",
+ "Pi": "Punctuation, initial quote",
+ "Pf": "Punctuation, final quote",
+ "Po": "Punctuation, other",
+ "Sm": "Symbol, math",
+ "Sc": "Symbol, currency",
+ "Sk": "Symbol, modifier",
+ "So": "Symbol, other",
+ "Zs": "Separator, space",
+ "Zl": "Separator, line",
+ "Zp": "Separator, paragraph",
+ "Cc": "Other, control",
+ "Cf": "Other, format",
+ "Cs": "Other, surrogate",
+ "Co": "Other, private use",
+ "Cn": "Other, not assigned",
+}
+
func printCategories() {
if *tablelist == "" {
return
varDecl = "\tTitle = _Lt; // Title is the set of Unicode title case letters.\n"
}
if len(name) > 1 {
- varDecl += fmt.Sprintf(
- "\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
- name, name, name, name)
+ desc, ok := categoryMapping[name]
+ if ok {
+ varDecl += fmt.Sprintf(
+ "\t%s = _%s; // %s is the set of Unicode characters in category %s (%s).\n",
+ name, name, name, name, desc)
+ } else {
+ varDecl += fmt.Sprintf(
+ "\t%s = _%s; // %s is the set of Unicode characters in category %s.\n",
+ name, name, name, name)
+ }
}
decl[ndecl] = varDecl
ndecl++
// These variables have type *RangeTable.
var (
- Cc = _Cc // Cc is the set of Unicode characters in category Cc.
- Cf = _Cf // Cf is the set of Unicode characters in category Cf.
- Co = _Co // Co is the set of Unicode characters in category Co.
- Cs = _Cs // Cs is the set of Unicode characters in category Cs.
+ Cc = _Cc // Cc is the set of Unicode characters in category Cc (Other, control).
+ Cf = _Cf // Cf is the set of Unicode characters in category Cf (Other, format).
+ Co = _Co // Co is the set of Unicode characters in category Co (Other, private use).
+ Cs = _Cs // Cs is the set of Unicode characters in category Cs (Other, surrogate).
Digit = _Nd // Digit is the set of Unicode characters with the "decimal digit" property.
- Nd = _Nd // Nd is the set of Unicode characters in category Nd.
+ Nd = _Nd // Nd is the set of Unicode characters in category Nd (Number, decimal digit).
Letter = _L // Letter/L is the set of Unicode letters, category L.
L = _L
- Lm = _Lm // Lm is the set of Unicode characters in category Lm.
- Lo = _Lo // Lo is the set of Unicode characters in category Lo.
+ Lm = _Lm // Lm is the set of Unicode characters in category Lm (Letter, modifier).
+ Lo = _Lo // Lo is the set of Unicode characters in category Lo (Letter, other).
Lower = _Ll // Lower is the set of Unicode lower case letters.
- Ll = _Ll // Ll is the set of Unicode characters in category Ll.
+ Ll = _Ll // Ll is the set of Unicode characters in category Ll (Letter, lowercase).
Mark = _M // Mark/M is the set of Unicode mark characters, category M.
M = _M
- Mc = _Mc // Mc is the set of Unicode characters in category Mc.
- Me = _Me // Me is the set of Unicode characters in category Me.
- Mn = _Mn // Mn is the set of Unicode characters in category Mn.
- Nl = _Nl // Nl is the set of Unicode characters in category Nl.
- No = _No // No is the set of Unicode characters in category No.
+ Mc = _Mc // Mc is the set of Unicode characters in category Mc (Mark, spacing combining).
+ Me = _Me // Me is the set of Unicode characters in category Me (Mark, enclosing).
+ Mn = _Mn // Mn is the set of Unicode characters in category Mn (Mark, nonspacing).
+ Nl = _Nl // Nl is the set of Unicode characters in category Nl (Number, letter).
+ No = _No // No is the set of Unicode characters in category No (Number, other).
Number = _N // Number/N is the set of Unicode number characters, category N.
N = _N
Other = _C // Other/C is the set of Unicode control and special characters, category C.
C = _C
- Pc = _Pc // Pc is the set of Unicode characters in category Pc.
- Pd = _Pd // Pd is the set of Unicode characters in category Pd.
- Pe = _Pe // Pe is the set of Unicode characters in category Pe.
- Pf = _Pf // Pf is the set of Unicode characters in category Pf.
- Pi = _Pi // Pi is the set of Unicode characters in category Pi.
- Po = _Po // Po is the set of Unicode characters in category Po.
- Ps = _Ps // Ps is the set of Unicode characters in category Ps.
+ Pc = _Pc // Pc is the set of Unicode characters in category Pc (Punctuation, connector).
+ Pd = _Pd // Pd is the set of Unicode characters in category Pd (Punctuation, dash).
+ Pe = _Pe // Pe is the set of Unicode characters in category Pe (Punctuation, close).
+ Pf = _Pf // Pf is the set of Unicode characters in category Pf (Punctuation, final quote).
+ Pi = _Pi // Pi is the set of Unicode characters in category Pi (Punctuation, initial quote).
+ Po = _Po // Po is the set of Unicode characters in category Po (Punctuation, other).
+ Ps = _Ps // Ps is the set of Unicode characters in category Ps (Punctuation, open).
Punct = _P // Punct/P is the set of Unicode punctuation characters, category P.
P = _P
- Sc = _Sc // Sc is the set of Unicode characters in category Sc.
- Sk = _Sk // Sk is the set of Unicode characters in category Sk.
- Sm = _Sm // Sm is the set of Unicode characters in category Sm.
- So = _So // So is the set of Unicode characters in category So.
+ Sc = _Sc // Sc is the set of Unicode characters in category Sc (Symbol, currency).
+ Sk = _Sk // Sk is the set of Unicode characters in category Sk (Symbol, modifier).
+ Sm = _Sm // Sm is the set of Unicode characters in category Sm (Symbol, math).
+ So = _So // So is the set of Unicode characters in category So (Symbol, other).
Space = _Z // Space/Z is the set of Unicode space characters, category Z.
Z = _Z
Symbol = _S // Symbol/S is the set of Unicode symbol characters, category S.
S = _S
Title = _Lt // Title is the set of Unicode title case letters.
- Lt = _Lt // Lt is the set of Unicode characters in category Lt.
+ Lt = _Lt // Lt is the set of Unicode characters in category Lt (Letter, titlecase).
Upper = _Lu // Upper is the set of Unicode upper case letters.
- Lu = _Lu // Lu is the set of Unicode characters in category Lu.
- Zl = _Zl // Zl is the set of Unicode characters in category Zl.
- Zp = _Zp // Zp is the set of Unicode characters in category Zp.
- Zs = _Zs // Zs is the set of Unicode characters in category Zs.
+ Lu = _Lu // Lu is the set of Unicode characters in category Lu (Letter, uppercase).
+ Zl = _Zl // Zl is the set of Unicode characters in category Zl (Separator, line).
+ Zp = _Zp // Zp is the set of Unicode characters in category Zp (Separator, paragraph).
+ Zs = _Zs // Zs is the set of Unicode characters in category Zs (Separator, space).
)
// Generated by running