]> Cypherpunks repositories - gostls13.git/commitdiff
exp/html/atom: new package.
authorNigel Tao <nigeltao@golang.org>
Thu, 31 May 2012 05:37:18 +0000 (15:37 +1000)
committerNigel Tao <nigeltao@golang.org>
Thu, 31 May 2012 05:37:18 +0000 (15:37 +1000)
50% fewer mallocs in HTML tokenization, resulting in 25% fewer mallocs
in parsing go1.html.

Making the parser use integer comparisons instead of string comparisons
will be a follow-up CL, to be co-ordinated with Andy Balholm's work.

exp/html benchmarks before/after:

BenchmarkParser      500    4754294 ns/op   16.44 MB/s
        parse_test.go:409: 500 iterations, 14651 mallocs per iteration
BenchmarkRawLevelTokenizer     2000     903481 ns/op   86.51 MB/s
        token_test.go:678: 2000 iterations, 28 mallocs per iteration
BenchmarkLowLevelTokenizer     2000    1260485 ns/op   62.01 MB/s
        token_test.go:678: 2000 iterations, 41 mallocs per iteration
BenchmarkHighLevelTokenizer     1000    2165964 ns/op   36.09 MB/s
        token_test.go:678: 1000 iterations, 6616 mallocs per iteration

BenchmarkParser      500    4664912 ns/op   16.76 MB/s
        parse_test.go:409: 500 iterations, 11266 mallocs per iteration
BenchmarkRawLevelTokenizer     2000     903065 ns/op   86.55 MB/s
        token_test.go:678: 2000 iterations, 28 mallocs per iteration
BenchmarkLowLevelTokenizer     2000    1260032 ns/op   62.03 MB/s
        token_test.go:678: 2000 iterations, 41 mallocs per iteration
BenchmarkHighLevelTokenizer     1000    2143356 ns/op   36.47 MB/s
        token_test.go:678: 1000 iterations, 3231 mallocs per iteration

R=r, rsc, rogpeppe
CC=andybalholm, golang-dev
https://golang.org/cl/6255062

src/pkg/exp/html/atom/atom.go [new file with mode: 0644]
src/pkg/exp/html/atom/atom_test.go [new file with mode: 0644]
src/pkg/exp/html/atom/gen.go [new file with mode: 0644]
src/pkg/exp/html/atom/table.go [new file with mode: 0644]
src/pkg/exp/html/token.go

diff --git a/src/pkg/exp/html/atom/atom.go b/src/pkg/exp/html/atom/atom.go
new file mode 100644 (file)
index 0000000..1ffde98
--- /dev/null
@@ -0,0 +1,88 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package atom provides integer codes (also known as atoms) for a fixed set of
+// frequently occurring HTML strings: lower-case tag names and attribute keys
+// such as "p" and "id".
+//
+// Sharing an atom's string representation between all elements with the same
+// tag can result in fewer string allocations when tokenizing and parsing HTML.
+// Integer comparisons are also generally faster than string comparisons.
+//
+// An atom's particular code (such as atom.Div == 63) is not guaranteed to
+// stay the same between versions of this package. Neither is any ordering
+// guaranteed: whether atom.H1 < atom.H2 may also change. The codes are not
+// guaranteed to be dense. The only guarantees are that e.g. looking up "div"
+// will yield atom.Div, calling atom.Div.String will return "div", and
+// atom.Div != 0.
+package atom
+
+// Atom is an integer code for a string. The zero value maps to "".
+type Atom int
+
+// String returns the atom's string representation.
+func (a Atom) String() string {
+       if a <= 0 || a > max {
+               return ""
+       }
+       return table[a]
+}
+
+// Lookup returns the atom whose name is s. It returns zero if there is no
+// such atom.
+func Lookup(s []byte) Atom {
+       if len(s) == 0 {
+               return 0
+       }
+       if len(s) == 1 {
+               x := s[0]
+               if x < 'a' || x > 'z' {
+                       return 0
+               }
+               return oneByteAtoms[x-'a']
+       }
+       // Binary search for the atom. Unlike sort.Search, this returns early on an exact match.
+       // TODO: this could be optimized further. For example, lo and hi could be initialized
+       // from s[0]. Separately, all the "onxxx" atoms could be moved into their own table.
+       lo, hi := Atom(1), 1+max
+       for lo < hi {
+               mid := (lo + hi) / 2
+               if cmp := compare(s, table[mid]); cmp == 0 {
+                       return mid
+               } else if cmp > 0 {
+                       lo = mid + 1
+               } else {
+                       hi = mid
+               }
+       }
+       return 0
+}
+
+// String returns a string whose contents are equal to s. In that sense, it is
+// equivalent to string(s), but may be more efficient.
+func String(s []byte) string {
+       if a := Lookup(s); a != 0 {
+               return a.String()
+       }
+       return string(s)
+}
+
+// compare is like bytes.Compare, except that it takes one []byte argument and
+// one string argument, and returns negative/0/positive instead of -1/0/+1.
+func compare(s []byte, t string) int {
+       n := len(s)
+       if n > len(t) {
+               n = len(t)
+       }
+       for i, si := range s[:n] {
+               ti := t[i]
+               switch {
+               case si > ti:
+                       return +1
+               case si < ti:
+                       return -1
+               }
+       }
+       return len(s) - len(t)
+}
diff --git a/src/pkg/exp/html/atom/atom_test.go b/src/pkg/exp/html/atom/atom_test.go
new file mode 100644 (file)
index 0000000..e494086
--- /dev/null
@@ -0,0 +1,52 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+       "testing"
+)
+
+func TestHits(t *testing.T) {
+       for i, s := range table {
+               got := Lookup([]byte(s))
+               if got != Atom(i) {
+                       t.Errorf("Lookup(%q): got %d, want %d", s, got, i)
+               }
+       }
+}
+
+func TestMisses(t *testing.T) {
+       testCases := []string{
+               "",
+               "\x00",
+               "\xff",
+               "A",
+               "DIV",
+               "Div",
+               "dIV",
+               "aa",
+               "a\x00",
+               "ab",
+               "abb",
+               "abbr0",
+               "abbr ",
+               " abbr",
+               " a",
+               "acceptcharset",
+               "acceptCharset",
+               "accept_charset",
+               "h0",
+               "h1h2",
+               "h7",
+               "onClick",
+               "λ",
+       }
+       for _, tc := range testCases {
+               got := Lookup([]byte(tc))
+               if got != 0 {
+                       t.Errorf("Lookup(%q): got %d, want 0", tc, got)
+               }
+       }
+}
diff --git a/src/pkg/exp/html/atom/gen.go b/src/pkg/exp/html/atom/gen.go
new file mode 100644 (file)
index 0000000..176c26e
--- /dev/null
@@ -0,0 +1,405 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+package main
+
+// This program generates table.go
+// Invoke as
+//
+//     go run gen.go |gofmt >table.go
+
+import (
+       "fmt"
+       "sort"
+)
+
+// identifier converts s to a Go exported identifier.
+// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
+func identifier(s string) string {
+       b := make([]byte, 0, len(s))
+       cap := true
+       for _, c := range s {
+               if c == '-' {
+                       cap = true
+                       continue
+               }
+               if cap && 'a' <= c && c <= 'z' {
+                       c -= 'a' - 'A'
+               }
+               cap = false
+               b = append(b, byte(c))
+       }
+       return string(b)
+}
+
+func main() {
+       m := map[string]bool{
+               "": true,
+       }
+       for _, list := range [][]string{elements, attributes, eventHandlers, extra} {
+               for _, s := range list {
+                       m[s] = true
+               }
+       }
+       atoms := make([]string, 0, len(m))
+       for s := range m {
+               atoms = append(atoms, s)
+       }
+       sort.Strings(atoms)
+
+       byInt := []string{}
+       byStr := map[string]int{}
+       ident := []string{}
+       for i, s := range atoms {
+               byInt = append(byInt, s)
+               byStr[s] = i
+               ident = append(ident, identifier(s))
+       }
+
+       fmt.Printf("package atom\n\nconst (\n")
+       for i, _ := range byInt {
+               if i == 0 {
+                       continue
+               }
+               fmt.Printf("\t%s Atom = %d\n", ident[i], i)
+       }
+       fmt.Printf(")\n\n")
+       fmt.Printf("const max Atom = %d\n\n", len(byInt)-1)
+       fmt.Printf("var table = []string{\n")
+       for _, s := range byInt {
+               fmt.Printf("\t%q,\n", s)
+       }
+       fmt.Printf("}\n\n")
+       fmt.Printf("var oneByteAtoms = [26]Atom{\n")
+       for i := 'a'; i <= 'z'; i++ {
+               val := "0"
+               if x := byStr[string(i)]; x != 0 {
+                       val = ident[x]
+               }
+               fmt.Printf("\t%s,\n", val)
+       }
+       fmt.Printf("}\n\n")
+}
+
+// The lists of element names and attribute keys were taken from
+// http://www.whatwg.org/specs/web-apps/current-work/multipage/section-index.html
+// as of the "HTML Living Standard - Last Updated 30 May 2012" version.
+
+var elements = []string{
+       "a",
+       "abbr",
+       "address",
+       "area",
+       "article",
+       "aside",
+       "audio",
+       "b",
+       "base",
+       "bdi",
+       "bdo",
+       "blockquote",
+       "body",
+       "br",
+       "button",
+       "canvas",
+       "caption",
+       "cite",
+       "code",
+       "col",
+       "colgroup",
+       "command",
+       "data",
+       "datalist",
+       "dd",
+       "del",
+       "details",
+       "dfn",
+       "dialog",
+       "div",
+       "dl",
+       "dt",
+       "em",
+       "embed",
+       "fieldset",
+       "figcaption",
+       "figure",
+       "footer",
+       "form",
+       "h1",
+       "h2",
+       "h3",
+       "h4",
+       "h5",
+       "h6",
+       "head",
+       "header",
+       "hgroup",
+       "hr",
+       "html",
+       "i",
+       "iframe",
+       "img",
+       "input",
+       "ins",
+       "kbd",
+       "keygen",
+       "label",
+       "legend",
+       "li",
+       "link",
+       "map",
+       "mark",
+       "menu",
+       "meta",
+       "meter",
+       "nav",
+       "noscript",
+       "object",
+       "ol",
+       "optgroup",
+       "option",
+       "output",
+       "p",
+       "param",
+       "pre",
+       "progress",
+       "q",
+       "rp",
+       "rt",
+       "ruby",
+       "s",
+       "samp",
+       "script",
+       "section",
+       "select",
+       "small",
+       "source",
+       "span",
+       "strong",
+       "style",
+       "sub",
+       "summary",
+       "sup",
+       "table",
+       "tbody",
+       "td",
+       "textarea",
+       "tfoot",
+       "th",
+       "thead",
+       "time",
+       "title",
+       "tr",
+       "track",
+       "u",
+       "ul",
+       "var",
+       "video",
+       "wbr",
+}
+
+var attributes = []string{
+       "accept",
+       "accept-charset",
+       "accesskey",
+       "action",
+       "alt",
+       "async",
+       "autocomplete",
+       "autofocus",
+       "autoplay",
+       "border",
+       "challenge",
+       "charset",
+       "checked",
+       "cite",
+       "class",
+       "cols",
+       "colspan",
+       "command",
+       "content",
+       "contenteditable",
+       "contextmenu",
+       "controls",
+       "coords",
+       "crossorigin",
+       "data",
+       "datetime",
+       "default",
+       "defer",
+       "dir",
+       "dirname",
+       "disabled",
+       "download",
+       "draggable",
+       "dropzone",
+       "enctype",
+       "for",
+       "form",
+       "formaction",
+       "formenctype",
+       "formmethod",
+       "formnovalidate",
+       "formtarget",
+       "headers",
+       "height",
+       "hidden",
+       "high",
+       "href",
+       "hreflang",
+       "http-equiv",
+       "icon",
+       "id",
+       "inert",
+       "ismap",
+       "itemid",
+       "itemprop",
+       "itemref",
+       "itemscope",
+       "itemtype",
+       "keytype",
+       "kind",
+       "label",
+       "lang",
+       "list",
+       "loop",
+       "low",
+       "manifest",
+       "max",
+       "maxlength",
+       "media",
+       "mediagroup",
+       "method",
+       "min",
+       "multiple",
+       "muted",
+       "name",
+       "novalidate",
+       "open",
+       "optimum",
+       "pattern",
+       "ping",
+       "placeholder",
+       "poster",
+       "preload",
+       "radiogroup",
+       "readonly",
+       "rel",
+       "required",
+       "reversed",
+       "rows",
+       "rowspan",
+       "sandbox",
+       "spellcheck",
+       "scope",
+       "scoped",
+       "seamless",
+       "selected",
+       "shape",
+       "size",
+       "sizes",
+       "span",
+       "src",
+       "srcdoc",
+       "srclang",
+       "start",
+       "step",
+       "style",
+       "tabindex",
+       "target",
+       "title",
+       "translate",
+       "type",
+       "typemustmatch",
+       "usemap",
+       "value",
+       "width",
+       "wrap",
+}
+
+var eventHandlers = []string{
+       "onabort",
+       "onafterprint",
+       "onbeforeprint",
+       "onbeforeunload",
+       "onblur",
+       "oncancel",
+       "oncanplay",
+       "oncanplaythrough",
+       "onchange",
+       "onclick",
+       "onclose",
+       "oncontextmenu",
+       "oncuechange",
+       "ondblclick",
+       "ondrag",
+       "ondragend",
+       "ondragenter",
+       "ondragleave",
+       "ondragover",
+       "ondragstart",
+       "ondrop",
+       "ondurationchange",
+       "onemptied",
+       "onended",
+       "onerror",
+       "onfocus",
+       "onhashchange",
+       "oninput",
+       "oninvalid",
+       "onkeydown",
+       "onkeypress",
+       "onkeyup",
+       "onload",
+       "onloadeddata",
+       "onloadedmetadata",
+       "onloadstart",
+       "onmessage",
+       "onmousedown",
+       "onmousemove",
+       "onmouseout",
+       "onmouseover",
+       "onmouseup",
+       "onmousewheel",
+       "onoffline",
+       "ononline",
+       "onpagehide",
+       "onpageshow",
+       "onpause",
+       "onplay",
+       "onplaying",
+       "onpopstate",
+       "onprogress",
+       "onratechange",
+       "onreset",
+       "onresize",
+       "onscroll",
+       "onseeked",
+       "onseeking",
+       "onselect",
+       "onshow",
+       "onstalled",
+       "onstorage",
+       "onsubmit",
+       "onsuspend",
+       "ontimeupdate",
+       "onunload",
+       "onvolumechange",
+       "onwaiting",
+}
+
+// extra are ad-hoc values not covered by any of the lists above.
+var extra = []string{
+       "align",
+       "annotation",
+       "applet",
+       "center",
+       "color",
+       "font",
+       "frame",
+       "frameset",
+       "nobr",
+}
diff --git a/src/pkg/exp/html/atom/table.go b/src/pkg/exp/html/atom/table.go
new file mode 100644 (file)
index 0000000..8300cd2
--- /dev/null
@@ -0,0 +1,629 @@
+package atom
+
+const (
+       A                Atom = 1
+       Abbr             Atom = 2
+       Accept           Atom = 3
+       AcceptCharset    Atom = 4
+       Accesskey        Atom = 5
+       Action           Atom = 6
+       Address          Atom = 7
+       Align            Atom = 8
+       Alt              Atom = 9
+       Annotation       Atom = 10
+       Applet           Atom = 11
+       Area             Atom = 12
+       Article          Atom = 13
+       Aside            Atom = 14
+       Async            Atom = 15
+       Audio            Atom = 16
+       Autocomplete     Atom = 17
+       Autofocus        Atom = 18
+       Autoplay         Atom = 19
+       B                Atom = 20
+       Base             Atom = 21
+       Bdi              Atom = 22
+       Bdo              Atom = 23
+       Blockquote       Atom = 24
+       Body             Atom = 25
+       Border           Atom = 26
+       Br               Atom = 27
+       Button           Atom = 28
+       Canvas           Atom = 29
+       Caption          Atom = 30
+       Center           Atom = 31
+       Challenge        Atom = 32
+       Charset          Atom = 33
+       Checked          Atom = 34
+       Cite             Atom = 35
+       Class            Atom = 36
+       Code             Atom = 37
+       Col              Atom = 38
+       Colgroup         Atom = 39
+       Color            Atom = 40
+       Cols             Atom = 41
+       Colspan          Atom = 42
+       Command          Atom = 43
+       Content          Atom = 44
+       Contenteditable  Atom = 45
+       Contextmenu      Atom = 46
+       Controls         Atom = 47
+       Coords           Atom = 48
+       Crossorigin      Atom = 49
+       Data             Atom = 50
+       Datalist         Atom = 51
+       Datetime         Atom = 52
+       Dd               Atom = 53
+       Default          Atom = 54
+       Defer            Atom = 55
+       Del              Atom = 56
+       Details          Atom = 57
+       Dfn              Atom = 58
+       Dialog           Atom = 59
+       Dir              Atom = 60
+       Dirname          Atom = 61
+       Disabled         Atom = 62
+       Div              Atom = 63
+       Dl               Atom = 64
+       Download         Atom = 65
+       Draggable        Atom = 66
+       Dropzone         Atom = 67
+       Dt               Atom = 68
+       Em               Atom = 69
+       Embed            Atom = 70
+       Enctype          Atom = 71
+       Fieldset         Atom = 72
+       Figcaption       Atom = 73
+       Figure           Atom = 74
+       Font             Atom = 75
+       Footer           Atom = 76
+       For              Atom = 77
+       Form             Atom = 78
+       Formaction       Atom = 79
+       Formenctype      Atom = 80
+       Formmethod       Atom = 81
+       Formnovalidate   Atom = 82
+       Formtarget       Atom = 83
+       Frame            Atom = 84
+       Frameset         Atom = 85
+       H1               Atom = 86
+       H2               Atom = 87
+       H3               Atom = 88
+       H4               Atom = 89
+       H5               Atom = 90
+       H6               Atom = 91
+       Head             Atom = 92
+       Header           Atom = 93
+       Headers          Atom = 94
+       Height           Atom = 95
+       Hgroup           Atom = 96
+       Hidden           Atom = 97
+       High             Atom = 98
+       Hr               Atom = 99
+       Href             Atom = 100
+       Hreflang         Atom = 101
+       Html             Atom = 102
+       HttpEquiv        Atom = 103
+       I                Atom = 104
+       Icon             Atom = 105
+       Id               Atom = 106
+       Iframe           Atom = 107
+       Img              Atom = 108
+       Inert            Atom = 109
+       Input            Atom = 110
+       Ins              Atom = 111
+       Ismap            Atom = 112
+       Itemid           Atom = 113
+       Itemprop         Atom = 114
+       Itemref          Atom = 115
+       Itemscope        Atom = 116
+       Itemtype         Atom = 117
+       Kbd              Atom = 118
+       Keygen           Atom = 119
+       Keytype          Atom = 120
+       Kind             Atom = 121
+       Label            Atom = 122
+       Lang             Atom = 123
+       Legend           Atom = 124
+       Li               Atom = 125
+       Link             Atom = 126
+       List             Atom = 127
+       Loop             Atom = 128
+       Low              Atom = 129
+       Manifest         Atom = 130
+       Map              Atom = 131
+       Mark             Atom = 132
+       Max              Atom = 133
+       Maxlength        Atom = 134
+       Media            Atom = 135
+       Mediagroup       Atom = 136
+       Menu             Atom = 137
+       Meta             Atom = 138
+       Meter            Atom = 139
+       Method           Atom = 140
+       Min              Atom = 141
+       Multiple         Atom = 142
+       Muted            Atom = 143
+       Name             Atom = 144
+       Nav              Atom = 145
+       Nobr             Atom = 146
+       Noscript         Atom = 147
+       Novalidate       Atom = 148
+       Object           Atom = 149
+       Ol               Atom = 150
+       Onabort          Atom = 151
+       Onafterprint     Atom = 152
+       Onbeforeprint    Atom = 153
+       Onbeforeunload   Atom = 154
+       Onblur           Atom = 155
+       Oncancel         Atom = 156
+       Oncanplay        Atom = 157
+       Oncanplaythrough Atom = 158
+       Onchange         Atom = 159
+       Onclick          Atom = 160
+       Onclose          Atom = 161
+       Oncontextmenu    Atom = 162
+       Oncuechange      Atom = 163
+       Ondblclick       Atom = 164
+       Ondrag           Atom = 165
+       Ondragend        Atom = 166
+       Ondragenter      Atom = 167
+       Ondragleave      Atom = 168
+       Ondragover       Atom = 169
+       Ondragstart      Atom = 170
+       Ondrop           Atom = 171
+       Ondurationchange Atom = 172
+       Onemptied        Atom = 173
+       Onended          Atom = 174
+       Onerror          Atom = 175
+       Onfocus          Atom = 176
+       Onhashchange     Atom = 177
+       Oninput          Atom = 178
+       Oninvalid        Atom = 179
+       Onkeydown        Atom = 180
+       Onkeypress       Atom = 181
+       Onkeyup          Atom = 182
+       Onload           Atom = 183
+       Onloadeddata     Atom = 184
+       Onloadedmetadata Atom = 185
+       Onloadstart      Atom = 186
+       Onmessage        Atom = 187
+       Onmousedown      Atom = 188
+       Onmousemove      Atom = 189
+       Onmouseout       Atom = 190
+       Onmouseover      Atom = 191
+       Onmouseup        Atom = 192
+       Onmousewheel     Atom = 193
+       Onoffline        Atom = 194
+       Ononline         Atom = 195
+       Onpagehide       Atom = 196
+       Onpageshow       Atom = 197
+       Onpause          Atom = 198
+       Onplay           Atom = 199
+       Onplaying        Atom = 200
+       Onpopstate       Atom = 201
+       Onprogress       Atom = 202
+       Onratechange     Atom = 203
+       Onreset          Atom = 204
+       Onresize         Atom = 205
+       Onscroll         Atom = 206
+       Onseeked         Atom = 207
+       Onseeking        Atom = 208
+       Onselect         Atom = 209
+       Onshow           Atom = 210
+       Onstalled        Atom = 211
+       Onstorage        Atom = 212
+       Onsubmit         Atom = 213
+       Onsuspend        Atom = 214
+       Ontimeupdate     Atom = 215
+       Onunload         Atom = 216
+       Onvolumechange   Atom = 217
+       Onwaiting        Atom = 218
+       Open             Atom = 219
+       Optgroup         Atom = 220
+       Optimum          Atom = 221
+       Option           Atom = 222
+       Output           Atom = 223
+       P                Atom = 224
+       Param            Atom = 225
+       Pattern          Atom = 226
+       Ping             Atom = 227
+       Placeholder      Atom = 228
+       Poster           Atom = 229
+       Pre              Atom = 230
+       Preload          Atom = 231
+       Progress         Atom = 232
+       Q                Atom = 233
+       Radiogroup       Atom = 234
+       Readonly         Atom = 235
+       Rel              Atom = 236
+       Required         Atom = 237
+       Reversed         Atom = 238
+       Rows             Atom = 239
+       Rowspan          Atom = 240
+       Rp               Atom = 241
+       Rt               Atom = 242
+       Ruby             Atom = 243
+       S                Atom = 244
+       Samp             Atom = 245
+       Sandbox          Atom = 246
+       Scope            Atom = 247
+       Scoped           Atom = 248
+       Script           Atom = 249
+       Seamless         Atom = 250
+       Section          Atom = 251
+       Select           Atom = 252
+       Selected         Atom = 253
+       Shape            Atom = 254
+       Size             Atom = 255
+       Sizes            Atom = 256
+       Small            Atom = 257
+       Source           Atom = 258
+       Span             Atom = 259
+       Spellcheck       Atom = 260
+       Src              Atom = 261
+       Srcdoc           Atom = 262
+       Srclang          Atom = 263
+       Start            Atom = 264
+       Step             Atom = 265
+       Strong           Atom = 266
+       Style            Atom = 267
+       Sub              Atom = 268
+       Summary          Atom = 269
+       Sup              Atom = 270
+       Tabindex         Atom = 271
+       Table            Atom = 272
+       Target           Atom = 273
+       Tbody            Atom = 274
+       Td               Atom = 275
+       Textarea         Atom = 276
+       Tfoot            Atom = 277
+       Th               Atom = 278
+       Thead            Atom = 279
+       Time             Atom = 280
+       Title            Atom = 281
+       Tr               Atom = 282
+       Track            Atom = 283
+       Translate        Atom = 284
+       Type             Atom = 285
+       Typemustmatch    Atom = 286
+       U                Atom = 287
+       Ul               Atom = 288
+       Usemap           Atom = 289
+       Value            Atom = 290
+       Var              Atom = 291
+       Video            Atom = 292
+       Wbr              Atom = 293
+       Width            Atom = 294
+       Wrap             Atom = 295
+)
+
+const max Atom = 295
+
+var table = []string{
+       "",
+       "a",
+       "abbr",
+       "accept",
+       "accept-charset",
+       "accesskey",
+       "action",
+       "address",
+       "align",
+       "alt",
+       "annotation",
+       "applet",
+       "area",
+       "article",
+       "aside",
+       "async",
+       "audio",
+       "autocomplete",
+       "autofocus",
+       "autoplay",
+       "b",
+       "base",
+       "bdi",
+       "bdo",
+       "blockquote",
+       "body",
+       "border",
+       "br",
+       "button",
+       "canvas",
+       "caption",
+       "center",
+       "challenge",
+       "charset",
+       "checked",
+       "cite",
+       "class",
+       "code",
+       "col",
+       "colgroup",
+       "color",
+       "cols",
+       "colspan",
+       "command",
+       "content",
+       "contenteditable",
+       "contextmenu",
+       "controls",
+       "coords",
+       "crossorigin",
+       "data",
+       "datalist",
+       "datetime",
+       "dd",
+       "default",
+       "defer",
+       "del",
+       "details",
+       "dfn",
+       "dialog",
+       "dir",
+       "dirname",
+       "disabled",
+       "div",
+       "dl",
+       "download",
+       "draggable",
+       "dropzone",
+       "dt",
+       "em",
+       "embed",
+       "enctype",
+       "fieldset",
+       "figcaption",
+       "figure",
+       "font",
+       "footer",
+       "for",
+       "form",
+       "formaction",
+       "formenctype",
+       "formmethod",
+       "formnovalidate",
+       "formtarget",
+       "frame",
+       "frameset",
+       "h1",
+       "h2",
+       "h3",
+       "h4",
+       "h5",
+       "h6",
+       "head",
+       "header",
+       "headers",
+       "height",
+       "hgroup",
+       "hidden",
+       "high",
+       "hr",
+       "href",
+       "hreflang",
+       "html",
+       "http-equiv",
+       "i",
+       "icon",
+       "id",
+       "iframe",
+       "img",
+       "inert",
+       "input",
+       "ins",
+       "ismap",
+       "itemid",
+       "itemprop",
+       "itemref",
+       "itemscope",
+       "itemtype",
+       "kbd",
+       "keygen",
+       "keytype",
+       "kind",
+       "label",
+       "lang",
+       "legend",
+       "li",
+       "link",
+       "list",
+       "loop",
+       "low",
+       "manifest",
+       "map",
+       "mark",
+       "max",
+       "maxlength",
+       "media",
+       "mediagroup",
+       "menu",
+       "meta",
+       "meter",
+       "method",
+       "min",
+       "multiple",
+       "muted",
+       "name",
+       "nav",
+       "nobr",
+       "noscript",
+       "novalidate",
+       "object",
+       "ol",
+       "onabort",
+       "onafterprint",
+       "onbeforeprint",
+       "onbeforeunload",
+       "onblur",
+       "oncancel",
+       "oncanplay",
+       "oncanplaythrough",
+       "onchange",
+       "onclick",
+       "onclose",
+       "oncontextmenu",
+       "oncuechange",
+       "ondblclick",
+       "ondrag",
+       "ondragend",
+       "ondragenter",
+       "ondragleave",
+       "ondragover",
+       "ondragstart",
+       "ondrop",
+       "ondurationchange",
+       "onemptied",
+       "onended",
+       "onerror",
+       "onfocus",
+       "onhashchange",
+       "oninput",
+       "oninvalid",
+       "onkeydown",
+       "onkeypress",
+       "onkeyup",
+       "onload",
+       "onloadeddata",
+       "onloadedmetadata",
+       "onloadstart",
+       "onmessage",
+       "onmousedown",
+       "onmousemove",
+       "onmouseout",
+       "onmouseover",
+       "onmouseup",
+       "onmousewheel",
+       "onoffline",
+       "ononline",
+       "onpagehide",
+       "onpageshow",
+       "onpause",
+       "onplay",
+       "onplaying",
+       "onpopstate",
+       "onprogress",
+       "onratechange",
+       "onreset",
+       "onresize",
+       "onscroll",
+       "onseeked",
+       "onseeking",
+       "onselect",
+       "onshow",
+       "onstalled",
+       "onstorage",
+       "onsubmit",
+       "onsuspend",
+       "ontimeupdate",
+       "onunload",
+       "onvolumechange",
+       "onwaiting",
+       "open",
+       "optgroup",
+       "optimum",
+       "option",
+       "output",
+       "p",
+       "param",
+       "pattern",
+       "ping",
+       "placeholder",
+       "poster",
+       "pre",
+       "preload",
+       "progress",
+       "q",
+       "radiogroup",
+       "readonly",
+       "rel",
+       "required",
+       "reversed",
+       "rows",
+       "rowspan",
+       "rp",
+       "rt",
+       "ruby",
+       "s",
+       "samp",
+       "sandbox",
+       "scope",
+       "scoped",
+       "script",
+       "seamless",
+       "section",
+       "select",
+       "selected",
+       "shape",
+       "size",
+       "sizes",
+       "small",
+       "source",
+       "span",
+       "spellcheck",
+       "src",
+       "srcdoc",
+       "srclang",
+       "start",
+       "step",
+       "strong",
+       "style",
+       "sub",
+       "summary",
+       "sup",
+       "tabindex",
+       "table",
+       "target",
+       "tbody",
+       "td",
+       "textarea",
+       "tfoot",
+       "th",
+       "thead",
+       "time",
+       "title",
+       "tr",
+       "track",
+       "translate",
+       "type",
+       "typemustmatch",
+       "u",
+       "ul",
+       "usemap",
+       "value",
+       "var",
+       "video",
+       "wbr",
+       "width",
+       "wrap",
+}
+
+var oneByteAtoms = [26]Atom{
+       A,
+       B,
+       0,
+       0,
+       0,
+       0,
+       0,
+       0,
+       I,
+       0,
+       0,
+       0,
+       0,
+       0,
+       0,
+       P,
+       Q,
+       0,
+       S,
+       0,
+       U,
+       0,
+       0,
+       0,
+       0,
+       0,
+}
index c9ab6e0761ad99ef7d6d05f4a1ca726128eb5947..632ba8d2f2ef8ece1d67fdc7267e6e4f6e0b424c 100644 (file)
@@ -6,6 +6,7 @@ package html
 
 import (
        "bytes"
+       "exp/html/atom"
        "io"
        "strconv"
        "strings"
@@ -791,13 +792,13 @@ func (z *Tokenizer) Token() Token {
                for moreAttr {
                        var key, val []byte
                        key, val, moreAttr = z.TagAttr()
-                       attr = append(attr, Attribute{"", string(key), string(val)})
+                       attr = append(attr, Attribute{"", atom.String(key), string(val)})
                }
-               t.Data = string(name)
+               t.Data = atom.String(name)
                t.Attr = attr
        case EndTagToken:
                name, _ := z.TagName()
-               t.Data = string(name)
+               t.Data = atom.String(name)
        }
        return t
 }