From: Nigel Tao Date: Thu, 31 May 2012 05:37:18 +0000 (+1000) Subject: exp/html/atom: new package. X-Git-Tag: go1.1rc2~3041 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=bb4a817a923f9a0ea59bbaec84e11b924e568442;p=gostls13.git exp/html/atom: new package. 50% fewer mallocs in HTML tokenization, resulting in 25% fewer mallocs in parsing go1.html. Making the parser use integer comparisons instead of string comparisons will be a follow-up CL, to be co-ordinated with Andy Balholm's work. exp/html benchmarks before/after: BenchmarkParser 500 4754294 ns/op 16.44 MB/s parse_test.go:409: 500 iterations, 14651 mallocs per iteration BenchmarkRawLevelTokenizer 2000 903481 ns/op 86.51 MB/s token_test.go:678: 2000 iterations, 28 mallocs per iteration BenchmarkLowLevelTokenizer 2000 1260485 ns/op 62.01 MB/s token_test.go:678: 2000 iterations, 41 mallocs per iteration BenchmarkHighLevelTokenizer 1000 2165964 ns/op 36.09 MB/s token_test.go:678: 1000 iterations, 6616 mallocs per iteration BenchmarkParser 500 4664912 ns/op 16.76 MB/s parse_test.go:409: 500 iterations, 11266 mallocs per iteration BenchmarkRawLevelTokenizer 2000 903065 ns/op 86.55 MB/s token_test.go:678: 2000 iterations, 28 mallocs per iteration BenchmarkLowLevelTokenizer 2000 1260032 ns/op 62.03 MB/s token_test.go:678: 2000 iterations, 41 mallocs per iteration BenchmarkHighLevelTokenizer 1000 2143356 ns/op 36.47 MB/s token_test.go:678: 1000 iterations, 3231 mallocs per iteration R=r, rsc, rogpeppe CC=andybalholm, golang-dev https://golang.org/cl/6255062 --- diff --git a/src/pkg/exp/html/atom/atom.go b/src/pkg/exp/html/atom/atom.go new file mode 100644 index 0000000000..1ffde98471 --- /dev/null +++ b/src/pkg/exp/html/atom/atom.go @@ -0,0 +1,88 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package atom provides integer codes (also known as atoms) for a fixed set of +// frequently occurring HTML strings: lower-case tag names and attribute keys +// such as "p" and "id". +// +// Sharing an atom's string representation between all elements with the same +// tag can result in fewer string allocations when tokenizing and parsing HTML. +// Integer comparisons are also generally faster than string comparisons. +// +// An atom's particular code (such as atom.Div == 63) is not guaranteed to +// stay the same between versions of this package. Neither is any ordering +// guaranteed: whether atom.H1 < atom.H2 may also change. The codes are not +// guaranteed to be dense. The only guarantees are that e.g. looking up "div" +// will yield atom.Div, calling atom.Div.String will return "div", and +// atom.Div != 0. +package atom + +// Atom is an integer code for a string. The zero value maps to "". +type Atom int + +// String returns the atom's string representation. +func (a Atom) String() string { + if a <= 0 || a > max { + return "" + } + return table[a] +} + +// Lookup returns the atom whose name is s. It returns zero if there is no +// such atom. +func Lookup(s []byte) Atom { + if len(s) == 0 { + return 0 + } + if len(s) == 1 { + x := s[0] + if x < 'a' || x > 'z' { + return 0 + } + return oneByteAtoms[x-'a'] + } + // Binary search for the atom. Unlike sort.Search, this returns early on an exact match. + // TODO: this could be optimized further. For example, lo and hi could be initialized + // from s[0]. Separately, all the "onxxx" atoms could be moved into their own table. + lo, hi := Atom(1), 1+max + for lo < hi { + mid := (lo + hi) / 2 + if cmp := compare(s, table[mid]); cmp == 0 { + return mid + } else if cmp > 0 { + lo = mid + 1 + } else { + hi = mid + } + } + return 0 +} + +// String returns a string whose contents are equal to s. In that sense, it is +// equivalent to string(s), but may be more efficient. +func String(s []byte) string { + if a := Lookup(s); a != 0 { + return a.String() + } + return string(s) +} + +// compare is like bytes.Compare, except that it takes one []byte argument and +// one string argument, and returns negative/0/positive instead of -1/0/+1. +func compare(s []byte, t string) int { + n := len(s) + if n > len(t) { + n = len(t) + } + for i, si := range s[:n] { + ti := t[i] + switch { + case si > ti: + return +1 + case si < ti: + return -1 + } + } + return len(s) - len(t) +} diff --git a/src/pkg/exp/html/atom/atom_test.go b/src/pkg/exp/html/atom/atom_test.go new file mode 100644 index 0000000000..e4940865d0 --- /dev/null +++ b/src/pkg/exp/html/atom/atom_test.go @@ -0,0 +1,52 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package atom + +import ( + "testing" +) + +func TestHits(t *testing.T) { + for i, s := range table { + got := Lookup([]byte(s)) + if got != Atom(i) { + t.Errorf("Lookup(%q): got %d, want %d", s, got, i) + } + } +} + +func TestMisses(t *testing.T) { + testCases := []string{ + "", + "\x00", + "\xff", + "A", + "DIV", + "Div", + "dIV", + "aa", + "a\x00", + "ab", + "abb", + "abbr0", + "abbr ", + " abbr", + " a", + "acceptcharset", + "acceptCharset", + "accept_charset", + "h0", + "h1h2", + "h7", + "onClick", + "λ", + } + for _, tc := range testCases { + got := Lookup([]byte(tc)) + if got != 0 { + t.Errorf("Lookup(%q): got %d, want 0", tc, got) + } + } +} diff --git a/src/pkg/exp/html/atom/gen.go b/src/pkg/exp/html/atom/gen.go new file mode 100644 index 0000000000..176c26ec3d --- /dev/null +++ b/src/pkg/exp/html/atom/gen.go @@ -0,0 +1,405 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ignore + +package main + +// This program generates table.go +// Invoke as +// +// go run gen.go |gofmt >table.go + +import ( + "fmt" + "sort" +) + +// identifier converts s to a Go exported identifier. +// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". +func identifier(s string) string { + b := make([]byte, 0, len(s)) + cap := true + for _, c := range s { + if c == '-' { + cap = true + continue + } + if cap && 'a' <= c && c <= 'z' { + c -= 'a' - 'A' + } + cap = false + b = append(b, byte(c)) + } + return string(b) +} + +func main() { + m := map[string]bool{ + "": true, + } + for _, list := range [][]string{elements, attributes, eventHandlers, extra} { + for _, s := range list { + m[s] = true + } + } + atoms := make([]string, 0, len(m)) + for s := range m { + atoms = append(atoms, s) + } + sort.Strings(atoms) + + byInt := []string{} + byStr := map[string]int{} + ident := []string{} + for i, s := range atoms { + byInt = append(byInt, s) + byStr[s] = i + ident = append(ident, identifier(s)) + } + + fmt.Printf("package atom\n\nconst (\n") + for i, _ := range byInt { + if i == 0 { + continue + } + fmt.Printf("\t%s Atom = %d\n", ident[i], i) + } + fmt.Printf(")\n\n") + fmt.Printf("const max Atom = %d\n\n", len(byInt)-1) + fmt.Printf("var table = []string{\n") + for _, s := range byInt { + fmt.Printf("\t%q,\n", s) + } + fmt.Printf("}\n\n") + fmt.Printf("var oneByteAtoms = [26]Atom{\n") + for i := 'a'; i <= 'z'; i++ { + val := "0" + if x := byStr[string(i)]; x != 0 { + val = ident[x] + } + fmt.Printf("\t%s,\n", val) + } + fmt.Printf("}\n\n") +} + +// The lists of element names and attribute keys were taken from +// http://www.whatwg.org/specs/web-apps/current-work/multipage/section-index.html +// as of the "HTML Living Standard - Last Updated 30 May 2012" version. + +var elements = []string{ + "a", + "abbr", + "address", + "area", + "article", + "aside", + "audio", + "b", + "base", + "bdi", + "bdo", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "cite", + "code", + "col", + "colgroup", + "command", + "data", + "datalist", + "dd", + "del", + "details", + "dfn", + "dialog", + "div", + "dl", + "dt", + "em", + "embed", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "iframe", + "img", + "input", + "ins", + "kbd", + "keygen", + "label", + "legend", + "li", + "link", + "map", + "mark", + "menu", + "meta", + "meter", + "nav", + "noscript", + "object", + "ol", + "optgroup", + "option", + "output", + "p", + "param", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "script", + "section", + "select", + "small", + "source", + "span", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "track", + "u", + "ul", + "var", + "video", + "wbr", +} + +var attributes = []string{ + "accept", + "accept-charset", + "accesskey", + "action", + "alt", + "async", + "autocomplete", + "autofocus", + "autoplay", + "border", + "challenge", + "charset", + "checked", + "cite", + "class", + "cols", + "colspan", + "command", + "content", + "contenteditable", + "contextmenu", + "controls", + "coords", + "crossorigin", + "data", + "datetime", + "default", + "defer", + "dir", + "dirname", + "disabled", + "download", + "draggable", + "dropzone", + "enctype", + "for", + "form", + "formaction", + "formenctype", + "formmethod", + "formnovalidate", + "formtarget", + "headers", + "height", + "hidden", + "high", + "href", + "hreflang", + "http-equiv", + "icon", + "id", + "inert", + "ismap", + "itemid", + "itemprop", + "itemref", + "itemscope", + "itemtype", + "keytype", + "kind", + "label", + "lang", + "list", + "loop", + "low", + "manifest", + "max", + "maxlength", + "media", + "mediagroup", + "method", + "min", + "multiple", + "muted", + "name", + "novalidate", + "open", + "optimum", + "pattern", + "ping", + "placeholder", + "poster", + "preload", + "radiogroup", + "readonly", + "rel", + "required", + "reversed", + "rows", + "rowspan", + "sandbox", + "spellcheck", + "scope", + "scoped", + "seamless", + "selected", + "shape", + "size", + "sizes", + "span", + "src", + "srcdoc", + "srclang", + "start", + "step", + "style", + "tabindex", + "target", + "title", + "translate", + "type", + "typemustmatch", + "usemap", + "value", + "width", + "wrap", +} + +var eventHandlers = []string{ + "onabort", + "onafterprint", + "onbeforeprint", + "onbeforeunload", + "onblur", + "oncancel", + "oncanplay", + "oncanplaythrough", + "onchange", + "onclick", + "onclose", + "oncontextmenu", + "oncuechange", + "ondblclick", + "ondrag", + "ondragend", + "ondragenter", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "ondurationchange", + "onemptied", + "onended", + "onerror", + "onfocus", + "onhashchange", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onload", + "onloadeddata", + "onloadedmetadata", + "onloadstart", + "onmessage", + "onmousedown", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onoffline", + "ononline", + "onpagehide", + "onpageshow", + "onpause", + "onplay", + "onplaying", + "onpopstate", + "onprogress", + "onratechange", + "onreset", + "onresize", + "onscroll", + "onseeked", + "onseeking", + "onselect", + "onshow", + "onstalled", + "onstorage", + "onsubmit", + "onsuspend", + "ontimeupdate", + "onunload", + "onvolumechange", + "onwaiting", +} + +// extra are ad-hoc values not covered by any of the lists above. +var extra = []string{ + "align", + "annotation", + "applet", + "center", + "color", + "font", + "frame", + "frameset", + "nobr", +} diff --git a/src/pkg/exp/html/atom/table.go b/src/pkg/exp/html/atom/table.go new file mode 100644 index 0000000000..8300cd21f6 --- /dev/null +++ b/src/pkg/exp/html/atom/table.go @@ -0,0 +1,629 @@ +package atom + +const ( + A Atom = 1 + Abbr Atom = 2 + Accept Atom = 3 + AcceptCharset Atom = 4 + Accesskey Atom = 5 + Action Atom = 6 + Address Atom = 7 + Align Atom = 8 + Alt Atom = 9 + Annotation Atom = 10 + Applet Atom = 11 + Area Atom = 12 + Article Atom = 13 + Aside Atom = 14 + Async Atom = 15 + Audio Atom = 16 + Autocomplete Atom = 17 + Autofocus Atom = 18 + Autoplay Atom = 19 + B Atom = 20 + Base Atom = 21 + Bdi Atom = 22 + Bdo Atom = 23 + Blockquote Atom = 24 + Body Atom = 25 + Border Atom = 26 + Br Atom = 27 + Button Atom = 28 + Canvas Atom = 29 + Caption Atom = 30 + Center Atom = 31 + Challenge Atom = 32 + Charset Atom = 33 + Checked Atom = 34 + Cite Atom = 35 + Class Atom = 36 + Code Atom = 37 + Col Atom = 38 + Colgroup Atom = 39 + Color Atom = 40 + Cols Atom = 41 + Colspan Atom = 42 + Command Atom = 43 + Content Atom = 44 + Contenteditable Atom = 45 + Contextmenu Atom = 46 + Controls Atom = 47 + Coords Atom = 48 + Crossorigin Atom = 49 + Data Atom = 50 + Datalist Atom = 51 + Datetime Atom = 52 + Dd Atom = 53 + Default Atom = 54 + Defer Atom = 55 + Del Atom = 56 + Details Atom = 57 + Dfn Atom = 58 + Dialog Atom = 59 + Dir Atom = 60 + Dirname Atom = 61 + Disabled Atom = 62 + Div Atom = 63 + Dl Atom = 64 + Download Atom = 65 + Draggable Atom = 66 + Dropzone Atom = 67 + Dt Atom = 68 + Em Atom = 69 + Embed Atom = 70 + Enctype Atom = 71 + Fieldset Atom = 72 + Figcaption Atom = 73 + Figure Atom = 74 + Font Atom = 75 + Footer Atom = 76 + For Atom = 77 + Form Atom = 78 + Formaction Atom = 79 + Formenctype Atom = 80 + Formmethod Atom = 81 + Formnovalidate Atom = 82 + Formtarget Atom = 83 + Frame Atom = 84 + Frameset Atom = 85 + H1 Atom = 86 + H2 Atom = 87 + H3 Atom = 88 + H4 Atom = 89 + H5 Atom = 90 + H6 Atom = 91 + Head Atom = 92 + Header Atom = 93 + Headers Atom = 94 + Height Atom = 95 + Hgroup Atom = 96 + Hidden Atom = 97 + High Atom = 98 + Hr Atom = 99 + Href Atom = 100 + Hreflang Atom = 101 + Html Atom = 102 + HttpEquiv Atom = 103 + I Atom = 104 + Icon Atom = 105 + Id Atom = 106 + Iframe Atom = 107 + Img Atom = 108 + Inert Atom = 109 + Input Atom = 110 + Ins Atom = 111 + Ismap Atom = 112 + Itemid Atom = 113 + Itemprop Atom = 114 + Itemref Atom = 115 + Itemscope Atom = 116 + Itemtype Atom = 117 + Kbd Atom = 118 + Keygen Atom = 119 + Keytype Atom = 120 + Kind Atom = 121 + Label Atom = 122 + Lang Atom = 123 + Legend Atom = 124 + Li Atom = 125 + Link Atom = 126 + List Atom = 127 + Loop Atom = 128 + Low Atom = 129 + Manifest Atom = 130 + Map Atom = 131 + Mark Atom = 132 + Max Atom = 133 + Maxlength Atom = 134 + Media Atom = 135 + Mediagroup Atom = 136 + Menu Atom = 137 + Meta Atom = 138 + Meter Atom = 139 + Method Atom = 140 + Min Atom = 141 + Multiple Atom = 142 + Muted Atom = 143 + Name Atom = 144 + Nav Atom = 145 + Nobr Atom = 146 + Noscript Atom = 147 + Novalidate Atom = 148 + Object Atom = 149 + Ol Atom = 150 + Onabort Atom = 151 + Onafterprint Atom = 152 + Onbeforeprint Atom = 153 + Onbeforeunload Atom = 154 + Onblur Atom = 155 + Oncancel Atom = 156 + Oncanplay Atom = 157 + Oncanplaythrough Atom = 158 + Onchange Atom = 159 + Onclick Atom = 160 + Onclose Atom = 161 + Oncontextmenu Atom = 162 + Oncuechange Atom = 163 + Ondblclick Atom = 164 + Ondrag Atom = 165 + Ondragend Atom = 166 + Ondragenter Atom = 167 + Ondragleave Atom = 168 + Ondragover Atom = 169 + Ondragstart Atom = 170 + Ondrop Atom = 171 + Ondurationchange Atom = 172 + Onemptied Atom = 173 + Onended Atom = 174 + Onerror Atom = 175 + Onfocus Atom = 176 + Onhashchange Atom = 177 + Oninput Atom = 178 + Oninvalid Atom = 179 + Onkeydown Atom = 180 + Onkeypress Atom = 181 + Onkeyup Atom = 182 + Onload Atom = 183 + Onloadeddata Atom = 184 + Onloadedmetadata Atom = 185 + Onloadstart Atom = 186 + Onmessage Atom = 187 + Onmousedown Atom = 188 + Onmousemove Atom = 189 + Onmouseout Atom = 190 + Onmouseover Atom = 191 + Onmouseup Atom = 192 + Onmousewheel Atom = 193 + Onoffline Atom = 194 + Ononline Atom = 195 + Onpagehide Atom = 196 + Onpageshow Atom = 197 + Onpause Atom = 198 + Onplay Atom = 199 + Onplaying Atom = 200 + Onpopstate Atom = 201 + Onprogress Atom = 202 + Onratechange Atom = 203 + Onreset Atom = 204 + Onresize Atom = 205 + Onscroll Atom = 206 + Onseeked Atom = 207 + Onseeking Atom = 208 + Onselect Atom = 209 + Onshow Atom = 210 + Onstalled Atom = 211 + Onstorage Atom = 212 + Onsubmit Atom = 213 + Onsuspend Atom = 214 + Ontimeupdate Atom = 215 + Onunload Atom = 216 + Onvolumechange Atom = 217 + Onwaiting Atom = 218 + Open Atom = 219 + Optgroup Atom = 220 + Optimum Atom = 221 + Option Atom = 222 + Output Atom = 223 + P Atom = 224 + Param Atom = 225 + Pattern Atom = 226 + Ping Atom = 227 + Placeholder Atom = 228 + Poster Atom = 229 + Pre Atom = 230 + Preload Atom = 231 + Progress Atom = 232 + Q Atom = 233 + Radiogroup Atom = 234 + Readonly Atom = 235 + Rel Atom = 236 + Required Atom = 237 + Reversed Atom = 238 + Rows Atom = 239 + Rowspan Atom = 240 + Rp Atom = 241 + Rt Atom = 242 + Ruby Atom = 243 + S Atom = 244 + Samp Atom = 245 + Sandbox Atom = 246 + Scope Atom = 247 + Scoped Atom = 248 + Script Atom = 249 + Seamless Atom = 250 + Section Atom = 251 + Select Atom = 252 + Selected Atom = 253 + Shape Atom = 254 + Size Atom = 255 + Sizes Atom = 256 + Small Atom = 257 + Source Atom = 258 + Span Atom = 259 + Spellcheck Atom = 260 + Src Atom = 261 + Srcdoc Atom = 262 + Srclang Atom = 263 + Start Atom = 264 + Step Atom = 265 + Strong Atom = 266 + Style Atom = 267 + Sub Atom = 268 + Summary Atom = 269 + Sup Atom = 270 + Tabindex Atom = 271 + Table Atom = 272 + Target Atom = 273 + Tbody Atom = 274 + Td Atom = 275 + Textarea Atom = 276 + Tfoot Atom = 277 + Th Atom = 278 + Thead Atom = 279 + Time Atom = 280 + Title Atom = 281 + Tr Atom = 282 + Track Atom = 283 + Translate Atom = 284 + Type Atom = 285 + Typemustmatch Atom = 286 + U Atom = 287 + Ul Atom = 288 + Usemap Atom = 289 + Value Atom = 290 + Var Atom = 291 + Video Atom = 292 + Wbr Atom = 293 + Width Atom = 294 + Wrap Atom = 295 +) + +const max Atom = 295 + +var table = []string{ + "", + "a", + "abbr", + "accept", + "accept-charset", + "accesskey", + "action", + "address", + "align", + "alt", + "annotation", + "applet", + "area", + "article", + "aside", + "async", + "audio", + "autocomplete", + "autofocus", + "autoplay", + "b", + "base", + "bdi", + "bdo", + "blockquote", + "body", + "border", + "br", + "button", + "canvas", + "caption", + "center", + "challenge", + "charset", + "checked", + "cite", + "class", + "code", + "col", + "colgroup", + "color", + "cols", + "colspan", + "command", + "content", + "contenteditable", + "contextmenu", + "controls", + "coords", + "crossorigin", + "data", + "datalist", + "datetime", + "dd", + "default", + "defer", + "del", + "details", + "dfn", + "dialog", + "dir", + "dirname", + "disabled", + "div", + "dl", + "download", + "draggable", + "dropzone", + "dt", + "em", + "embed", + "enctype", + "fieldset", + "figcaption", + "figure", + "font", + "footer", + "for", + "form", + "formaction", + "formenctype", + "formmethod", + "formnovalidate", + "formtarget", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "headers", + "height", + "hgroup", + "hidden", + "high", + "hr", + "href", + "hreflang", + "html", + "http-equiv", + "i", + "icon", + "id", + "iframe", + "img", + "inert", + "input", + "ins", + "ismap", + "itemid", + "itemprop", + "itemref", + "itemscope", + "itemtype", + "kbd", + "keygen", + "keytype", + "kind", + "label", + "lang", + "legend", + "li", + "link", + "list", + "loop", + "low", + "manifest", + "map", + "mark", + "max", + "maxlength", + "media", + "mediagroup", + "menu", + "meta", + "meter", + "method", + "min", + "multiple", + "muted", + "name", + "nav", + "nobr", + "noscript", + "novalidate", + "object", + "ol", + "onabort", + "onafterprint", + "onbeforeprint", + "onbeforeunload", + "onblur", + "oncancel", + "oncanplay", + "oncanplaythrough", + "onchange", + "onclick", + "onclose", + "oncontextmenu", + "oncuechange", + "ondblclick", + "ondrag", + "ondragend", + "ondragenter", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "ondurationchange", + "onemptied", + "onended", + "onerror", + "onfocus", + "onhashchange", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onload", + "onloadeddata", + "onloadedmetadata", + "onloadstart", + "onmessage", + "onmousedown", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onoffline", + "ononline", + "onpagehide", + "onpageshow", + "onpause", + "onplay", + "onplaying", + "onpopstate", + "onprogress", + "onratechange", + "onreset", + "onresize", + "onscroll", + "onseeked", + "onseeking", + "onselect", + "onshow", + "onstalled", + "onstorage", + "onsubmit", + "onsuspend", + "ontimeupdate", + "onunload", + "onvolumechange", + "onwaiting", + "open", + "optgroup", + "optimum", + "option", + "output", + "p", + "param", + "pattern", + "ping", + "placeholder", + "poster", + "pre", + "preload", + "progress", + "q", + "radiogroup", + "readonly", + "rel", + "required", + "reversed", + "rows", + "rowspan", + "rp", + "rt", + "ruby", + "s", + "samp", + "sandbox", + "scope", + "scoped", + "script", + "seamless", + "section", + "select", + "selected", + "shape", + "size", + "sizes", + "small", + "source", + "span", + "spellcheck", + "src", + "srcdoc", + "srclang", + "start", + "step", + "strong", + "style", + "sub", + "summary", + "sup", + "tabindex", + "table", + "target", + "tbody", + "td", + "textarea", + "tfoot", + "th", + "thead", + "time", + "title", + "tr", + "track", + "translate", + "type", + "typemustmatch", + "u", + "ul", + "usemap", + "value", + "var", + "video", + "wbr", + "width", + "wrap", +} + +var oneByteAtoms = [26]Atom{ + A, + B, + 0, + 0, + 0, + 0, + 0, + 0, + I, + 0, + 0, + 0, + 0, + 0, + 0, + P, + Q, + 0, + S, + 0, + U, + 0, + 0, + 0, + 0, + 0, +} diff --git a/src/pkg/exp/html/token.go b/src/pkg/exp/html/token.go index c9ab6e0761..632ba8d2f2 100644 --- a/src/pkg/exp/html/token.go +++ b/src/pkg/exp/html/token.go @@ -6,6 +6,7 @@ package html import ( "bytes" + "exp/html/atom" "io" "strconv" "strings" @@ -791,13 +792,13 @@ func (z *Tokenizer) Token() Token { for moreAttr { var key, val []byte key, val, moreAttr = z.TagAttr() - attr = append(attr, Attribute{"", string(key), string(val)}) + attr = append(attr, Attribute{"", atom.String(key), string(val)}) } - t.Data = string(name) + t.Data = atom.String(name) t.Attr = attr case EndTagToken: name, _ := z.TagName() - t.Data = string(name) + t.Data = atom.String(name) } return t }