]> Cypherpunks repositories - gostls13.git/commitdiff
exp/html/atom: faster, hash-based lookup.
authorNigel Tao <nigeltao@golang.org>
Thu, 31 May 2012 23:36:05 +0000 (09:36 +1000)
committerNigel Tao <nigeltao@golang.org>
Thu, 31 May 2012 23:36:05 +0000 (09:36 +1000)
exp/html/atom benchmark:
benchmark          old ns/op    new ns/op    delta
BenchmarkLookup       199226        80770  -59.46%

exp/html benchmark:
benchmark                      old ns/op    new ns/op    delta
BenchmarkParser                  4864890      4510834   -7.28%
BenchmarkHighLevelTokenizer      2209192      1969684  -10.84%
benchmark                       old MB/s     new MB/s  speedup
BenchmarkParser                    16.07        17.33    1.08x
BenchmarkHighLevelTokenizer        35.38        39.68    1.12x

R=r
CC=golang-dev
https://golang.org/cl/6261054

src/pkg/exp/html/atom/atom.go
src/pkg/exp/html/atom/atom_test.go
src/pkg/exp/html/atom/gen.go
src/pkg/exp/html/atom/table.go

index 1ffde98471a586b4d32dfdbf9db7774ec268b07d..b67428066d030afedd98ae305a63f1e1092abf32 100644 (file)
@@ -6,33 +6,40 @@
 // frequently occurring HTML strings: lower-case tag names and attribute keys
 // such as "p" and "id".
 //
-// Sharing an atom's string representation between all elements with the same
-// tag can result in fewer string allocations when tokenizing and parsing HTML.
-// Integer comparisons are also generally faster than string comparisons.
+// Sharing an atom's name between all elements with the same tag can result in
+// fewer string allocations when tokenizing and parsing HTML. Integer
+// comparisons are also generally faster than string comparisons.
 //
-// An atom's particular code (such as atom.Div == 63) is not guaranteed to
-// stay the same between versions of this package. Neither is any ordering
-// guaranteed: whether atom.H1 < atom.H2 may also change. The codes are not
-// guaranteed to be dense. The only guarantees are that e.g. looking up "div"
-// will yield atom.Div, calling atom.Div.String will return "div", and
-// atom.Div != 0.
+// The value of an atom's particular code is not guaranteed to stay the same
+// between versions of this package. Neither is any ordering guaranteed:
+// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
+// be dense. The only guarantees are that e.g. looking up "div" will yield
+// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
 package atom
 
+// The hash function must be the same as the one used in gen.go
+func hash(s []byte) (h uint32) {
+       for i := 0; i < len(s); i++ {
+               h = h<<5 ^ h>>27 ^ uint32(s[i])
+       }
+       return h
+}
+
 // Atom is an integer code for a string. The zero value maps to "".
 type Atom int
 
-// String returns the atom's string representation.
+// String returns the atom's name.
 func (a Atom) String() string {
-       if a <= 0 || a > max {
-               return ""
+       if 0 <= a && a < Atom(len(table)) {
+               return table[a]
        }
-       return table[a]
+       return ""
 }
 
 // Lookup returns the atom whose name is s. It returns zero if there is no
 // such atom.
 func Lookup(s []byte) Atom {
-       if len(s) == 0 {
+       if len(s) == 0 || len(s) > maxLen {
                return 0
        }
        if len(s) == 1 {
@@ -42,15 +49,25 @@ func Lookup(s []byte) Atom {
                }
                return oneByteAtoms[x-'a']
        }
-       // Binary search for the atom. Unlike sort.Search, this returns early on an exact match.
-       // TODO: this could be optimized further. For example, lo and hi could be initialized
-       // from s[0]. Separately, all the "onxxx" atoms could be moved into their own table.
-       lo, hi := Atom(1), 1+max
+       hs := hash(s)
+       // Binary search for hs. Unlike sort.Search, this returns early on an exact match.
+       // A loop invariant is that len(table[i]) == len(s) for all i in [lo, hi).
+       lo := Atom(loHi[len(s)])
+       hi := Atom(loHi[len(s)+1])
        for lo < hi {
                mid := (lo + hi) / 2
-               if cmp := compare(s, table[mid]); cmp == 0 {
+               if ht := hashes[mid]; hs == ht {
+                       // The gen.go program ensures that each atom's name has a distinct hash.
+                       // However, arbitrary strings may collide with the atom's name. We have
+                       // to check that string(s) == table[mid].
+                       t := table[mid]
+                       for i, si := range s {
+                               if si != t[i] {
+                                       return 0
+                               }
+                       }
                        return mid
-               } else if cmp > 0 {
+               } else if hs > ht {
                        lo = mid + 1
                } else {
                        hi = mid
@@ -67,22 +84,3 @@ func String(s []byte) string {
        }
        return string(s)
 }
-
-// compare is like bytes.Compare, except that it takes one []byte argument and
-// one string argument, and returns negative/0/positive instead of -1/0/+1.
-func compare(s []byte, t string) int {
-       n := len(s)
-       if n > len(t) {
-               n = len(t)
-       }
-       for i, si := range s[:n] {
-               ti := t[i]
-               switch {
-               case si > ti:
-                       return +1
-               case si < ti:
-                       return -1
-               }
-       }
-       return len(s) - len(t)
-}
index e4940865d0aaf11d891c5efee66394e96e9538b6..9b0726899bff685852ebfc12ea7bda5340e7cebb 100644 (file)
@@ -5,6 +5,7 @@
 package atom
 
 import (
+       "sort"
        "testing"
 )
 
@@ -42,6 +43,8 @@ func TestMisses(t *testing.T) {
                "h7",
                "onClick",
                "λ",
+               // The following string has the same hash (0xa1d7fab7) as "onmouseover".
+               "\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
        }
        for _, tc := range testCases {
                got := Lookup([]byte(tc))
@@ -50,3 +53,21 @@ func TestMisses(t *testing.T) {
                }
        }
 }
+
+func BenchmarkLookup(b *testing.B) {
+       sortedTable := make([]string, len(table))
+       copy(sortedTable, table[:])
+       sort.Strings(sortedTable)
+
+       x := make([][]byte, 1000)
+       for i := range x {
+               x[i] = []byte(sortedTable[i%len(sortedTable)])
+       }
+
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               for _, s := range x {
+                       Lookup(s)
+               }
+       }
+}
index 176c26ec3dac54e8e8f5c52338dce09ba30852ff..fc4407e0f811c584fc7830f0f667a44b5a5486db 100644 (file)
@@ -13,9 +13,30 @@ package main
 
 import (
        "fmt"
+       "os"
        "sort"
 )
 
+// The hash function must be the same as the one used in atom.go
+func hash(s string) (h uint32) {
+       for i := 0; i < len(s); i++ {
+               h = h<<5 ^ h>>27 ^ uint32(s[i])
+       }
+       return h
+}
+
+// lhash returns a uint64 whose high 32 bits are len(s) and whose low 32 bits
+// are hash(s).
+func lhash(s string) uint64 {
+       return uint64(len(s))<<32 | uint64(hash(s))
+}
+
+type byLhash []string
+
+func (b byLhash) Len() int           { return len(b) }
+func (b byLhash) Less(i, j int) bool { return lhash(b[i]) < lhash(b[j]) }
+func (b byLhash) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
+
 // identifier converts s to a Go exported identifier.
 // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
 func identifier(s string) string {
@@ -36,43 +57,84 @@ func identifier(s string) string {
 }
 
 func main() {
-       m := map[string]bool{
+       // Construct a list of atoms, sorted by their lhash.
+       m0 := map[string]bool{
                "": true,
        }
        for _, list := range [][]string{elements, attributes, eventHandlers, extra} {
                for _, s := range list {
-                       m[s] = true
+                       m0[s] = true
                }
        }
-       atoms := make([]string, 0, len(m))
-       for s := range m {
+       atoms := make([]string, 0, len(m0))
+       for s := range m0 {
                atoms = append(atoms, s)
        }
-       sort.Strings(atoms)
+       sort.Sort(byLhash(atoms))
 
+       // Calculate the magic constants to output as table.go.
        byInt := []string{}
        byStr := map[string]int{}
        ident := []string{}
+       lhashes := []uint64{}
+       maxLen := 0
        for i, s := range atoms {
                byInt = append(byInt, s)
                byStr[s] = i
                ident = append(ident, identifier(s))
+               lhashes = append(lhashes, lhash(s))
+               if maxLen < len(s) {
+                       maxLen = len(s)
+               }
+       }
+
+       // Check for hash collisions.
+       m1 := map[uint64]int{}
+       for i, h := range lhashes {
+               h &= 1<<32 - 1
+               if j, ok := m1[h]; ok {
+                       fmt.Fprintf(os.Stderr, "hash collision at 0x%08x: %q, %q\n", h, byInt[i], byInt[j])
+                       os.Exit(1)
+               }
+               m1[h] = i
        }
 
+       // Generate the Go code.
        fmt.Printf("package atom\n\nconst (\n")
-       for i, _ := range byInt {
-               if i == 0 {
-                       continue
+       {
+               // Print the Atoms in alphabetical order.
+               lines := []string{}
+               for i, _ := range byInt {
+                       if i == 0 {
+                               continue
+                       }
+                       lines = append(lines, fmt.Sprintf("\t%s Atom = %d", ident[i], i))
                }
-               fmt.Printf("\t%s Atom = %d\n", ident[i], i)
+               sort.Strings(lines)
+               for _, line := range lines {
+                       fmt.Println(line)
+               }
+               fmt.Printf(")\n\n")
        }
-       fmt.Printf(")\n\n")
-       fmt.Printf("const max Atom = %d\n\n", len(byInt)-1)
-       fmt.Printf("var table = []string{\n")
+       fmt.Printf("const maxLen = %d\n\n", maxLen)
+       fmt.Printf("var table = [...]string{\n")
        for _, s := range byInt {
                fmt.Printf("\t%q,\n", s)
        }
        fmt.Printf("}\n\n")
+       fmt.Printf("var hashes = [...]uint32{\n")
+       for _, s := range byInt {
+               fmt.Printf("\t0x%08x,\n", hash(s))
+       }
+       fmt.Printf("}\n\n")
+       fmt.Printf("var loHi = [maxLen + 2]uint16{\n")
+       for n := 0; n <= maxLen; n++ {
+               fmt.Printf("\t%d,\n", sort.Search(len(byInt), func(i int) bool {
+                       return int(lhashes[i]>>32) >= n
+               }))
+       }
+       fmt.Printf("\t%d,\n", len(byInt))
+       fmt.Printf("}\n\n")
        fmt.Printf("var oneByteAtoms = [26]Atom{\n")
        for i := 'a'; i <= 'z'; i++ {
                val := "0"
index 8300cd21f6091dbadfa0ae0a5fbfd1a37ffcaf6a..27cd7d18b44eebaadfa97d244cb71ef05819db3e 100644 (file)
@@ -2,601 +2,921 @@ package atom
 
 const (
        A                Atom = 1
-       Abbr             Atom = 2
-       Accept           Atom = 3
-       AcceptCharset    Atom = 4
-       Accesskey        Atom = 5
-       Action           Atom = 6
-       Address          Atom = 7
-       Align            Atom = 8
-       Alt              Atom = 9
-       Annotation       Atom = 10
-       Applet           Atom = 11
-       Area             Atom = 12
-       Article          Atom = 13
-       Aside            Atom = 14
-       Async            Atom = 15
-       Audio            Atom = 16
-       Autocomplete     Atom = 17
-       Autofocus        Atom = 18
-       Autoplay         Atom = 19
-       B                Atom = 20
-       Base             Atom = 21
-       Bdi              Atom = 22
-       Bdo              Atom = 23
-       Blockquote       Atom = 24
-       Body             Atom = 25
-       Border           Atom = 26
-       Br               Atom = 27
-       Button           Atom = 28
-       Canvas           Atom = 29
-       Caption          Atom = 30
-       Center           Atom = 31
-       Challenge        Atom = 32
-       Charset          Atom = 33
-       Checked          Atom = 34
-       Cite             Atom = 35
-       Class            Atom = 36
-       Code             Atom = 37
-       Col              Atom = 38
-       Colgroup         Atom = 39
-       Color            Atom = 40
-       Cols             Atom = 41
-       Colspan          Atom = 42
-       Command          Atom = 43
-       Content          Atom = 44
-       Contenteditable  Atom = 45
-       Contextmenu      Atom = 46
-       Controls         Atom = 47
-       Coords           Atom = 48
-       Crossorigin      Atom = 49
-       Data             Atom = 50
-       Datalist         Atom = 51
-       Datetime         Atom = 52
-       Dd               Atom = 53
-       Default          Atom = 54
-       Defer            Atom = 55
-       Del              Atom = 56
-       Details          Atom = 57
-       Dfn              Atom = 58
-       Dialog           Atom = 59
-       Dir              Atom = 60
-       Dirname          Atom = 61
-       Disabled         Atom = 62
-       Div              Atom = 63
-       Dl               Atom = 64
-       Download         Atom = 65
-       Draggable        Atom = 66
-       Dropzone         Atom = 67
-       Dt               Atom = 68
-       Em               Atom = 69
-       Embed            Atom = 70
-       Enctype          Atom = 71
-       Fieldset         Atom = 72
-       Figcaption       Atom = 73
-       Figure           Atom = 74
-       Font             Atom = 75
-       Footer           Atom = 76
-       For              Atom = 77
-       Form             Atom = 78
-       Formaction       Atom = 79
-       Formenctype      Atom = 80
-       Formmethod       Atom = 81
-       Formnovalidate   Atom = 82
-       Formtarget       Atom = 83
-       Frame            Atom = 84
-       Frameset         Atom = 85
-       H1               Atom = 86
-       H2               Atom = 87
-       H3               Atom = 88
-       H4               Atom = 89
-       H5               Atom = 90
-       H6               Atom = 91
-       Head             Atom = 92
-       Header           Atom = 93
-       Headers          Atom = 94
-       Height           Atom = 95
-       Hgroup           Atom = 96
-       Hidden           Atom = 97
-       High             Atom = 98
-       Hr               Atom = 99
-       Href             Atom = 100
-       Hreflang         Atom = 101
-       Html             Atom = 102
-       HttpEquiv        Atom = 103
-       I                Atom = 104
-       Icon             Atom = 105
-       Id               Atom = 106
-       Iframe           Atom = 107
-       Img              Atom = 108
-       Inert            Atom = 109
-       Input            Atom = 110
-       Ins              Atom = 111
-       Ismap            Atom = 112
-       Itemid           Atom = 113
-       Itemprop         Atom = 114
-       Itemref          Atom = 115
-       Itemscope        Atom = 116
-       Itemtype         Atom = 117
-       Kbd              Atom = 118
-       Keygen           Atom = 119
-       Keytype          Atom = 120
-       Kind             Atom = 121
-       Label            Atom = 122
-       Lang             Atom = 123
-       Legend           Atom = 124
-       Li               Atom = 125
-       Link             Atom = 126
-       List             Atom = 127
-       Loop             Atom = 128
-       Low              Atom = 129
-       Manifest         Atom = 130
-       Map              Atom = 131
-       Mark             Atom = 132
-       Max              Atom = 133
-       Maxlength        Atom = 134
-       Media            Atom = 135
-       Mediagroup       Atom = 136
-       Menu             Atom = 137
-       Meta             Atom = 138
-       Meter            Atom = 139
-       Method           Atom = 140
-       Min              Atom = 141
-       Multiple         Atom = 142
-       Muted            Atom = 143
-       Name             Atom = 144
-       Nav              Atom = 145
-       Nobr             Atom = 146
-       Noscript         Atom = 147
-       Novalidate       Atom = 148
-       Object           Atom = 149
-       Ol               Atom = 150
-       Onabort          Atom = 151
-       Onafterprint     Atom = 152
-       Onbeforeprint    Atom = 153
-       Onbeforeunload   Atom = 154
-       Onblur           Atom = 155
-       Oncancel         Atom = 156
-       Oncanplay        Atom = 157
-       Oncanplaythrough Atom = 158
-       Onchange         Atom = 159
-       Onclick          Atom = 160
-       Onclose          Atom = 161
-       Oncontextmenu    Atom = 162
-       Oncuechange      Atom = 163
-       Ondblclick       Atom = 164
-       Ondrag           Atom = 165
-       Ondragend        Atom = 166
-       Ondragenter      Atom = 167
-       Ondragleave      Atom = 168
-       Ondragover       Atom = 169
-       Ondragstart      Atom = 170
-       Ondrop           Atom = 171
-       Ondurationchange Atom = 172
-       Onemptied        Atom = 173
-       Onended          Atom = 174
-       Onerror          Atom = 175
-       Onfocus          Atom = 176
-       Onhashchange     Atom = 177
-       Oninput          Atom = 178
-       Oninvalid        Atom = 179
-       Onkeydown        Atom = 180
-       Onkeypress       Atom = 181
-       Onkeyup          Atom = 182
-       Onload           Atom = 183
-       Onloadeddata     Atom = 184
-       Onloadedmetadata Atom = 185
-       Onloadstart      Atom = 186
-       Onmessage        Atom = 187
-       Onmousedown      Atom = 188
-       Onmousemove      Atom = 189
-       Onmouseout       Atom = 190
-       Onmouseover      Atom = 191
-       Onmouseup        Atom = 192
-       Onmousewheel     Atom = 193
-       Onoffline        Atom = 194
-       Ononline         Atom = 195
-       Onpagehide       Atom = 196
-       Onpageshow       Atom = 197
-       Onpause          Atom = 198
-       Onplay           Atom = 199
-       Onplaying        Atom = 200
-       Onpopstate       Atom = 201
-       Onprogress       Atom = 202
-       Onratechange     Atom = 203
-       Onreset          Atom = 204
-       Onresize         Atom = 205
-       Onscroll         Atom = 206
-       Onseeked         Atom = 207
-       Onseeking        Atom = 208
-       Onselect         Atom = 209
-       Onshow           Atom = 210
-       Onstalled        Atom = 211
-       Onstorage        Atom = 212
-       Onsubmit         Atom = 213
-       Onsuspend        Atom = 214
-       Ontimeupdate     Atom = 215
-       Onunload         Atom = 216
-       Onvolumechange   Atom = 217
-       Onwaiting        Atom = 218
-       Open             Atom = 219
-       Optgroup         Atom = 220
-       Optimum          Atom = 221
-       Option           Atom = 222
-       Output           Atom = 223
-       P                Atom = 224
-       Param            Atom = 225
-       Pattern          Atom = 226
-       Ping             Atom = 227
-       Placeholder      Atom = 228
-       Poster           Atom = 229
-       Pre              Atom = 230
-       Preload          Atom = 231
-       Progress         Atom = 232
-       Q                Atom = 233
-       Radiogroup       Atom = 234
-       Readonly         Atom = 235
-       Rel              Atom = 236
-       Required         Atom = 237
-       Reversed         Atom = 238
-       Rows             Atom = 239
-       Rowspan          Atom = 240
-       Rp               Atom = 241
-       Rt               Atom = 242
-       Ruby             Atom = 243
-       S                Atom = 244
-       Samp             Atom = 245
-       Sandbox          Atom = 246
-       Scope            Atom = 247
-       Scoped           Atom = 248
-       Script           Atom = 249
-       Seamless         Atom = 250
-       Section          Atom = 251
-       Select           Atom = 252
-       Selected         Atom = 253
-       Shape            Atom = 254
-       Size             Atom = 255
-       Sizes            Atom = 256
-       Small            Atom = 257
-       Source           Atom = 258
-       Span             Atom = 259
-       Spellcheck       Atom = 260
-       Src              Atom = 261
-       Srcdoc           Atom = 262
-       Srclang          Atom = 263
-       Start            Atom = 264
-       Step             Atom = 265
-       Strong           Atom = 266
-       Style            Atom = 267
-       Sub              Atom = 268
-       Summary          Atom = 269
-       Sup              Atom = 270
-       Tabindex         Atom = 271
-       Table            Atom = 272
-       Target           Atom = 273
-       Tbody            Atom = 274
-       Td               Atom = 275
-       Textarea         Atom = 276
-       Tfoot            Atom = 277
-       Th               Atom = 278
-       Thead            Atom = 279
-       Time             Atom = 280
-       Title            Atom = 281
-       Tr               Atom = 282
-       Track            Atom = 283
-       Translate        Atom = 284
-       Type             Atom = 285
-       Typemustmatch    Atom = 286
-       U                Atom = 287
-       Ul               Atom = 288
-       Usemap           Atom = 289
-       Value            Atom = 290
-       Var              Atom = 291
-       Video            Atom = 292
-       Wbr              Atom = 293
-       Width            Atom = 294
-       Wrap             Atom = 295
+       Abbr             Atom = 58
+       Accept           Atom = 126
+       AcceptCharset    Atom = 288
+       Accesskey        Atom = 230
+       Action           Atom = 127
+       Address          Atom = 182
+       Align            Atom = 91
+       Alt              Atom = 32
+       Annotation       Atom = 251
+       Applet           Atom = 128
+       Area             Atom = 59
+       Article          Atom = 184
+       Aside            Atom = 92
+       Async            Atom = 93
+       Audio            Atom = 94
+       Autocomplete     Atom = 278
+       Autofocus        Atom = 243
+       Autoplay         Atom = 221
+       B                Atom = 2
+       Base             Atom = 56
+       Bdi              Atom = 30
+       Bdo              Atom = 31
+       Blockquote       Atom = 248
+       Body             Atom = 57
+       Border           Atom = 124
+       Br               Atom = 8
+       Button           Atom = 125
+       Canvas           Atom = 121
+       Caption          Atom = 160
+       Center           Atom = 122
+       Challenge        Atom = 242
+       Charset          Atom = 163
+       Checked          Atom = 164
+       Cite             Atom = 53
+       Class            Atom = 90
+       Code             Atom = 54
+       Col              Atom = 29
+       Colgroup         Atom = 193
+       Color            Atom = 89
+       Cols             Atom = 55
+       Colspan          Atom = 167
+       Command          Atom = 166
+       Content          Atom = 165
+       Contenteditable  Atom = 292
+       Contextmenu      Atom = 277
+       Controls         Atom = 192
+       Coords           Atom = 123
+       Crossorigin      Atom = 266
+       Data             Atom = 62
+       Datalist         Atom = 219
+       Datetime         Atom = 220
+       Dd               Atom = 10
+       Default          Atom = 188
+       Defer            Atom = 97
+       Del              Atom = 35
+       Details          Atom = 189
+       Dfn              Atom = 34
+       Dialog           Atom = 131
+       Dir              Atom = 36
+       Dirname          Atom = 190
+       Disabled         Atom = 216
+       Div              Atom = 37
+       Dl               Atom = 11
+       Download         Atom = 195
+       Draggable        Atom = 235
+       Dropzone         Atom = 202
+       Dt               Atom = 12
+       Em               Atom = 9
+       Embed            Atom = 96
+       Enctype          Atom = 183
+       Fieldset         Atom = 212
+       Figcaption       Atom = 247
+       Figure           Atom = 129
+       Font             Atom = 60
+       Footer           Atom = 130
+       For              Atom = 33
+       Form             Atom = 61
+       Formaction       Atom = 256
+       Formenctype      Atom = 273
+       Formmethod       Atom = 261
+       Formnovalidate   Atom = 289
+       Formtarget       Atom = 263
+       Frame            Atom = 95
+       Frameset         Atom = 198
+       H1               Atom = 13
+       H2               Atom = 14
+       H3               Atom = 15
+       H4               Atom = 16
+       H5               Atom = 17
+       H6               Atom = 18
+       Head             Atom = 65
+       Header           Atom = 136
+       Headers          Atom = 187
+       Height           Atom = 137
+       Hgroup           Atom = 135
+       Hidden           Atom = 138
+       High             Atom = 66
+       Hr               Atom = 20
+       Href             Atom = 67
+       Hreflang         Atom = 199
+       Html             Atom = 68
+       HttpEquiv        Atom = 254
+       I                Atom = 3
+       Icon             Atom = 64
+       Id               Atom = 19
+       Iframe           Atom = 133
+       Img              Atom = 40
+       Inert            Atom = 98
+       Input            Atom = 99
+       Ins              Atom = 39
+       Ismap            Atom = 100
+       Itemid           Atom = 134
+       Itemprop         Atom = 223
+       Itemref          Atom = 185
+       Itemscope        Atom = 240
+       Itemtype         Atom = 224
+       Kbd              Atom = 38
+       Keygen           Atom = 132
+       Keytype          Atom = 162
+       Kind             Atom = 63
+       Label            Atom = 104
+       Lang             Atom = 75
+       Legend           Atom = 149
+       Li               Atom = 22
+       Link             Atom = 76
+       List             Atom = 77
+       Loop             Atom = 78
+       Low              Atom = 45
+       Manifest         Atom = 213
+       Map              Atom = 42
+       Mark             Atom = 72
+       Max              Atom = 43
+       Maxlength        Atom = 245
+       Media            Atom = 101
+       Mediagroup       Atom = 257
+       Menu             Atom = 73
+       Meta             Atom = 74
+       Meter            Atom = 102
+       Method           Atom = 148
+       Min              Atom = 44
+       Multiple         Atom = 215
+       Muted            Atom = 103
+       Name             Atom = 70
+       Nav              Atom = 41
+       Nobr             Atom = 71
+       Noscript         Atom = 194
+       Novalidate       Atom = 262
+       Object           Atom = 139
+       Ol               Atom = 21
+       Onabort          Atom = 170
+       Onafterprint     Atom = 280
+       Onbeforeprint    Atom = 287
+       Onbeforeunload   Atom = 291
+       Onblur           Atom = 140
+       Oncancel         Atom = 196
+       Oncanplay        Atom = 227
+       Oncanplaythrough Atom = 295
+       Onchange         Atom = 197
+       Onclick          Atom = 168
+       Onclose          Atom = 169
+       Oncontextmenu    Atom = 286
+       Oncuechange      Atom = 267
+       Ondblclick       Atom = 255
+       Ondrag           Atom = 141
+       Ondragend        Atom = 246
+       Ondragenter      Atom = 270
+       Ondragleave      Atom = 269
+       Ondragover       Atom = 252
+       Ondragstart      Atom = 268
+       Ondrop           Atom = 142
+       Ondurationchange Atom = 293
+       Onemptied        Atom = 241
+       Onended          Atom = 172
+       Onerror          Atom = 173
+       Onfocus          Atom = 171
+       Onhashchange     Atom = 283
+       Oninput          Atom = 175
+       Oninvalid        Atom = 239
+       Onkeydown        Atom = 231
+       Onkeypress       Atom = 264
+       Onkeyup          Atom = 174
+       Onload           Atom = 143
+       Onloadeddata     Atom = 284
+       Onloadedmetadata Atom = 294
+       Onloadstart      Atom = 271
+       Onmessage        Atom = 236
+       Onmousedown      Atom = 274
+       Onmousemove      Atom = 275
+       Onmouseout       Atom = 250
+       Onmouseover      Atom = 276
+       Onmouseup        Atom = 237
+       Onmousewheel     Atom = 279
+       Onoffline        Atom = 228
+       Ononline         Atom = 201
+       Onpagehide       Atom = 259
+       Onpageshow       Atom = 258
+       Onpause          Atom = 177
+       Onplay           Atom = 145
+       Onplaying        Atom = 244
+       Onpopstate       Atom = 249
+       Onprogress       Atom = 253
+       Onratechange     Atom = 282
+       Onreset          Atom = 176
+       Onresize         Atom = 207
+       Onscroll         Atom = 203
+       Onseeked         Atom = 204
+       Onseeking        Atom = 229
+       Onselect         Atom = 205
+       Onshow           Atom = 144
+       Onstalled        Atom = 233
+       Onstorage        Atom = 234
+       Onsubmit         Atom = 206
+       Onsuspend        Atom = 232
+       Ontimeupdate     Atom = 281
+       Onunload         Atom = 208
+       Onvolumechange   Atom = 290
+       Onwaiting        Atom = 226
+       Open             Atom = 69
+       Optgroup         Atom = 225
+       Optimum          Atom = 179
+       Option           Atom = 146
+       Output           Atom = 147
+       P                Atom = 4
+       Param            Atom = 111
+       Pattern          Atom = 186
+       Ping             Atom = 85
+       Placeholder      Atom = 272
+       Poster           Atom = 156
+       Pre              Atom = 50
+       Preload          Atom = 191
+       Progress         Atom = 200
+       Q                Atom = 5
+       Radiogroup       Atom = 260
+       Readonly         Atom = 210
+       Rel              Atom = 49
+       Required         Atom = 217
+       Reversed         Atom = 218
+       Rows             Atom = 83
+       Rowspan          Atom = 181
+       Rp               Atom = 23
+       Rt               Atom = 24
+       Ruby             Atom = 84
+       S                Atom = 6
+       Samp             Atom = 79
+       Sandbox          Atom = 159
+       Scope            Atom = 105
+       Scoped           Atom = 150
+       Script           Atom = 151
+       Seamless         Atom = 211
+       Section          Atom = 161
+       Select           Atom = 152
+       Selected         Atom = 214
+       Shape            Atom = 107
+       Size             Atom = 80
+       Sizes            Atom = 106
+       Small            Atom = 108
+       Source           Atom = 153
+       Span             Atom = 81
+       Spellcheck       Atom = 265
+       Src              Atom = 46
+       Srcdoc           Atom = 154
+       Srclang          Atom = 178
+       Start            Atom = 109
+       Step             Atom = 82
+       Strong           Atom = 155
+       Style            Atom = 110
+       Sub              Atom = 47
+       Summary          Atom = 180
+       Sup              Atom = 48
+       Tabindex         Atom = 209
+       Table            Atom = 116
+       Target           Atom = 158
+       Tbody            Atom = 115
+       Td               Atom = 26
+       Textarea         Atom = 222
+       Tfoot            Atom = 117
+       Th               Atom = 27
+       Thead            Atom = 119
+       Time             Atom = 87
+       Title            Atom = 118
+       Tr               Atom = 28
+       Track            Atom = 120
+       Translate        Atom = 238
+       Type             Atom = 88
+       Typemustmatch    Atom = 285
+       U                Atom = 7
+       Ul               Atom = 25
+       Usemap           Atom = 157
+       Value            Atom = 113
+       Var              Atom = 52
+       Video            Atom = 114
+       Wbr              Atom = 51
+       Width            Atom = 112
+       Wrap             Atom = 86
 )
 
-const max Atom = 295
+const maxLen = 16
 
-var table = []string{
+var table = [...]string{
        "",
        "a",
-       "abbr",
-       "accept",
-       "accept-charset",
-       "accesskey",
-       "action",
-       "address",
-       "align",
-       "alt",
-       "annotation",
-       "applet",
-       "area",
-       "article",
-       "aside",
-       "async",
-       "audio",
-       "autocomplete",
-       "autofocus",
-       "autoplay",
        "b",
-       "base",
-       "bdi",
-       "bdo",
-       "blockquote",
-       "body",
-       "border",
+       "i",
+       "p",
+       "q",
+       "s",
+       "u",
        "br",
-       "button",
-       "canvas",
-       "caption",
-       "center",
-       "challenge",
-       "charset",
-       "checked",
-       "cite",
-       "class",
-       "code",
-       "col",
-       "colgroup",
-       "color",
-       "cols",
-       "colspan",
-       "command",
-       "content",
-       "contenteditable",
-       "contextmenu",
-       "controls",
-       "coords",
-       "crossorigin",
-       "data",
-       "datalist",
-       "datetime",
+       "em",
        "dd",
-       "default",
-       "defer",
-       "del",
-       "details",
-       "dfn",
-       "dialog",
-       "dir",
-       "dirname",
-       "disabled",
-       "div",
        "dl",
-       "download",
-       "draggable",
-       "dropzone",
        "dt",
-       "em",
-       "embed",
-       "enctype",
-       "fieldset",
-       "figcaption",
-       "figure",
-       "font",
-       "footer",
-       "for",
-       "form",
-       "formaction",
-       "formenctype",
-       "formmethod",
-       "formnovalidate",
-       "formtarget",
-       "frame",
-       "frameset",
        "h1",
        "h2",
        "h3",
        "h4",
        "h5",
        "h6",
+       "id",
+       "hr",
+       "ol",
+       "li",
+       "rp",
+       "rt",
+       "ul",
+       "td",
+       "th",
+       "tr",
+       "col",
+       "bdi",
+       "bdo",
+       "alt",
+       "for",
+       "dfn",
+       "del",
+       "dir",
+       "div",
+       "kbd",
+       "ins",
+       "img",
+       "nav",
+       "map",
+       "max",
+       "min",
+       "low",
+       "src",
+       "sub",
+       "sup",
+       "rel",
+       "pre",
+       "wbr",
+       "var",
+       "cite",
+       "code",
+       "cols",
+       "base",
+       "body",
+       "abbr",
+       "area",
+       "font",
+       "form",
+       "data",
+       "kind",
+       "icon",
        "head",
-       "header",
-       "headers",
-       "height",
-       "hgroup",
-       "hidden",
        "high",
-       "hr",
        "href",
-       "hreflang",
        "html",
-       "http-equiv",
-       "i",
-       "icon",
-       "id",
-       "iframe",
-       "img",
-       "inert",
-       "input",
-       "ins",
-       "ismap",
-       "itemid",
-       "itemprop",
-       "itemref",
-       "itemscope",
-       "itemtype",
-       "kbd",
-       "keygen",
-       "keytype",
-       "kind",
-       "label",
+       "open",
+       "name",
+       "nobr",
+       "mark",
+       "menu",
+       "meta",
        "lang",
-       "legend",
-       "li",
        "link",
        "list",
        "loop",
-       "low",
-       "manifest",
-       "map",
-       "mark",
-       "max",
-       "maxlength",
+       "samp",
+       "size",
+       "span",
+       "step",
+       "rows",
+       "ruby",
+       "ping",
+       "wrap",
+       "time",
+       "type",
+       "color",
+       "class",
+       "align",
+       "aside",
+       "async",
+       "audio",
+       "frame",
+       "embed",
+       "defer",
+       "inert",
+       "input",
+       "ismap",
        "media",
-       "mediagroup",
-       "menu",
-       "meta",
        "meter",
-       "method",
-       "min",
-       "multiple",
        "muted",
-       "name",
-       "nav",
-       "nobr",
-       "noscript",
-       "novalidate",
+       "label",
+       "scope",
+       "sizes",
+       "shape",
+       "small",
+       "start",
+       "style",
+       "param",
+       "width",
+       "value",
+       "video",
+       "tbody",
+       "table",
+       "tfoot",
+       "title",
+       "thead",
+       "track",
+       "canvas",
+       "center",
+       "coords",
+       "border",
+       "button",
+       "accept",
+       "action",
+       "applet",
+       "figure",
+       "footer",
+       "dialog",
+       "keygen",
+       "iframe",
+       "itemid",
+       "hgroup",
+       "header",
+       "height",
+       "hidden",
        "object",
-       "ol",
-       "onabort",
-       "onafterprint",
-       "onbeforeprint",
-       "onbeforeunload",
        "onblur",
-       "oncancel",
-       "oncanplay",
-       "oncanplaythrough",
-       "onchange",
-       "onclick",
-       "onclose",
-       "oncontextmenu",
-       "oncuechange",
-       "ondblclick",
        "ondrag",
-       "ondragend",
-       "ondragenter",
-       "ondragleave",
-       "ondragover",
-       "ondragstart",
        "ondrop",
-       "ondurationchange",
-       "onemptied",
-       "onended",
-       "onerror",
-       "onfocus",
-       "onhashchange",
-       "oninput",
-       "oninvalid",
-       "onkeydown",
-       "onkeypress",
-       "onkeyup",
        "onload",
-       "onloadeddata",
-       "onloadedmetadata",
-       "onloadstart",
-       "onmessage",
-       "onmousedown",
-       "onmousemove",
-       "onmouseout",
-       "onmouseover",
-       "onmouseup",
-       "onmousewheel",
-       "onoffline",
-       "ononline",
-       "onpagehide",
-       "onpageshow",
-       "onpause",
-       "onplay",
-       "onplaying",
-       "onpopstate",
-       "onprogress",
-       "onratechange",
-       "onreset",
-       "onresize",
-       "onscroll",
-       "onseeked",
-       "onseeking",
-       "onselect",
        "onshow",
-       "onstalled",
-       "onstorage",
-       "onsubmit",
-       "onsuspend",
-       "ontimeupdate",
-       "onunload",
-       "onvolumechange",
-       "onwaiting",
-       "open",
-       "optgroup",
-       "optimum",
+       "onplay",
        "option",
        "output",
-       "p",
-       "param",
-       "pattern",
-       "ping",
-       "placeholder",
-       "poster",
-       "pre",
-       "preload",
-       "progress",
-       "q",
-       "radiogroup",
-       "readonly",
-       "rel",
-       "required",
-       "reversed",
-       "rows",
-       "rowspan",
-       "rp",
-       "rt",
-       "ruby",
-       "s",
-       "samp",
-       "sandbox",
-       "scope",
+       "method",
+       "legend",
        "scoped",
        "script",
-       "seamless",
-       "section",
        "select",
-       "selected",
-       "shape",
-       "size",
-       "sizes",
-       "small",
        "source",
-       "span",
-       "spellcheck",
-       "src",
        "srcdoc",
-       "srclang",
-       "start",
-       "step",
        "strong",
-       "style",
-       "sub",
+       "poster",
+       "usemap",
+       "target",
+       "sandbox",
+       "caption",
+       "section",
+       "keytype",
+       "charset",
+       "checked",
+       "content",
+       "command",
+       "colspan",
+       "onclick",
+       "onclose",
+       "onabort",
+       "onfocus",
+       "onended",
+       "onerror",
+       "onkeyup",
+       "oninput",
+       "onreset",
+       "onpause",
+       "srclang",
+       "optimum",
        "summary",
-       "sup",
+       "rowspan",
+       "address",
+       "enctype",
+       "article",
+       "itemref",
+       "pattern",
+       "headers",
+       "default",
+       "details",
+       "dirname",
+       "preload",
+       "controls",
+       "colgroup",
+       "noscript",
+       "download",
+       "oncancel",
+       "onchange",
+       "frameset",
+       "hreflang",
+       "progress",
+       "ononline",
+       "dropzone",
+       "onscroll",
+       "onseeked",
+       "onselect",
+       "onsubmit",
+       "onresize",
+       "onunload",
        "tabindex",
-       "table",
-       "target",
-       "tbody",
-       "td",
+       "readonly",
+       "seamless",
+       "fieldset",
+       "manifest",
+       "selected",
+       "multiple",
+       "disabled",
+       "required",
+       "reversed",
+       "datalist",
+       "datetime",
+       "autoplay",
        "textarea",
-       "tfoot",
-       "th",
-       "thead",
-       "time",
-       "title",
-       "tr",
-       "track",
+       "itemprop",
+       "itemtype",
+       "optgroup",
+       "onwaiting",
+       "oncanplay",
+       "onoffline",
+       "onseeking",
+       "accesskey",
+       "onkeydown",
+       "onsuspend",
+       "onstalled",
+       "onstorage",
+       "draggable",
+       "onmessage",
+       "onmouseup",
        "translate",
-       "type",
+       "oninvalid",
+       "itemscope",
+       "onemptied",
+       "challenge",
+       "autofocus",
+       "onplaying",
+       "maxlength",
+       "ondragend",
+       "figcaption",
+       "blockquote",
+       "onpopstate",
+       "onmouseout",
+       "annotation",
+       "ondragover",
+       "onprogress",
+       "http-equiv",
+       "ondblclick",
+       "formaction",
+       "mediagroup",
+       "onpageshow",
+       "onpagehide",
+       "radiogroup",
+       "formmethod",
+       "novalidate",
+       "formtarget",
+       "onkeypress",
+       "spellcheck",
+       "crossorigin",
+       "oncuechange",
+       "ondragstart",
+       "ondragleave",
+       "ondragenter",
+       "onloadstart",
+       "placeholder",
+       "formenctype",
+       "onmousedown",
+       "onmousemove",
+       "onmouseover",
+       "contextmenu",
+       "autocomplete",
+       "onmousewheel",
+       "onafterprint",
+       "ontimeupdate",
+       "onratechange",
+       "onhashchange",
+       "onloadeddata",
        "typemustmatch",
-       "u",
-       "ul",
-       "usemap",
-       "value",
-       "var",
-       "video",
-       "wbr",
-       "width",
-       "wrap",
+       "oncontextmenu",
+       "onbeforeprint",
+       "accept-charset",
+       "formnovalidate",
+       "onvolumechange",
+       "onbeforeunload",
+       "contenteditable",
+       "ondurationchange",
+       "onloadedmetadata",
+       "oncanplaythrough",
+}
+
+var hashes = [...]uint32{
+       0x00000000,
+       0x00000061,
+       0x00000062,
+       0x00000069,
+       0x00000070,
+       0x00000071,
+       0x00000073,
+       0x00000075,
+       0x00000c32,
+       0x00000ccd,
+       0x00000ce4,
+       0x00000cec,
+       0x00000cf4,
+       0x00000d31,
+       0x00000d32,
+       0x00000d33,
+       0x00000d34,
+       0x00000d35,
+       0x00000d36,
+       0x00000d44,
+       0x00000d72,
+       0x00000d8c,
+       0x00000de9,
+       0x00000e30,
+       0x00000e34,
+       0x00000ecc,
+       0x00000ee4,
+       0x00000ee8,
+       0x00000ef2,
+       0x0001818c,
+       0x000184e9,
+       0x000184ef,
+       0x000189f4,
+       0x00019592,
+       0x00019cae,
+       0x00019ccc,
+       0x00019d52,
+       0x00019d56,
+       0x0001a024,
+       0x0001a9b3,
+       0x0001a9c7,
+       0x0001b456,
+       0x0001b850,
+       0x0001b858,
+       0x0001b94e,
+       0x0001bd97,
+       0x0001c223,
+       0x0001c2c2,
+       0x0001c2d0,
+       0x0001c4cc,
+       0x0001ce25,
+       0x0001d032,
+       0x0001d452,
+       0x00302ae5,
+       0x003030e5,
+       0x003031f3,
+       0x00308a05,
+       0x0030b0f9,
+       0x00310432,
+       0x003144c1,
+       0x0032b1b4,
+       0x0032b22d,
+       0x00338ae1,
+       0x003429a4,
+       0x0035018e,
+       0x00359844,
+       0x0035a888,
+       0x0035c4c6,
+       0x0035ddcc,
+       0x00364cce,
+       0x003689c5,
+       0x0036b032,
+       0x00370a2b,
+       0x003719b5,
+       0x00371ae1,
+       0x003789a7,
+       0x0037a9ab,
+       0x0037aa14,
+       0x0037b190,
+       0x003809d0,
+       0x00382b25,
+       0x00384c4e,
+       0x00385cd0,
+       0x0038b293,
+       0x0038d839,
+       0x0039a9a7,
+       0x003a4450,
+       0x003ba9c5,
+       0x003bea65,
+       0x06063d92,
+       0x06078a13,
+       0x0627a88e,
+       0x062828e5,
+       0x062869a3,
+       0x062b1d4f,
+       0x065889c5,
+       0x066704c4,
+       0x067314d2,
+       0x06a69a34,
+       0x06a6ced4,
+       0x06a83850,
+       0x06e31d41,
+       0x06e35cd2,
+       0x06eb5cc4,
+       0x06f104cc,
+       0x07003265,
+       0x070564d3,
+       0x07058a65,
+       0x070709ec,
+       0x070b8a34,
+       0x070be9e5,
+       0x0731444d,
+       0x07451ee8,
+       0x07513ec5,
+       0x07551ccf,
+       0x0770b0f9,
+       0x077105e5,
+       0x0772b194,
+       0x07755de5,
+       0x07759844,
+       0x0778880b,
+       0xc026d453,
+       0xc066dcd2,
+       0xc0c644f3,
+       0xc2c89cd2,
+       0xc36bdd8e,
+       0xc4001a74,
+       0xc40ba98e,
+       0xc539bcd4,
+       0xcaa25a25,
+       0xcac65cd2,
+       0xcea13d87,
+       0xd06d10ce,
+       0xd45889c5,
+       0xd5733944,
+       0xd648b2d0,
+       0xd6611cd2,
+       0xd6651174,
+       0xd6a39cce,
+       0xd8149814,
+       0xd8d0bed2,
+       0xd8d3c447,
+       0xd8d3c590,
+       0xd8d7b044,
+       0xd8d82d97,
+       0xd8d9bc59,
+       0xd93ba98e,
+       0xd96bced4,
+       0xdc6bad84,
+       0xde6219a4,
+       0xe0064cc4,
+       0xe008aa74,
+       0xe0679814,
+       0xe0cb4405,
+       0xe1101d83,
+       0xe178b1a7,
+       0xe6c85cd2,
+       0xed033850,
+       0xee2890d4,
+       0x04d38584,
+       0x053ba996,
+       0x0c0ba992,
+       0x0dabea7f,
+       0x1628c0cc,
+       0x166020dc,
+       0x18db99ac,
+       0x18e709bc,
+       0x18f84c56,
+       0x1a07a810,
+       0x1a07b21e,
+       0x1a20b22f,
+       0x1a5602c8,
+       0x1a669cdf,
+       0x1a68c589,
+       0x1a836acb,
+       0x1aa6cecf,
+       0x1b1340cf,
+       0x1b315a1e,
+       0x220789bb,
+       0x27753ad6,
+       0x2ce70a25,
+       0x59484c52,
+       0x8e789a0b,
+       0x9a0bea7c,
+       0xa37501fd,
+       0xae6744dc,
+       0xc57b9a32,
+       0xcc239a29,
+       0xcc5159ed,
+       0xcd7129ea,
+       0xd51689dc,
+       0xe267b058,
+       0x1b78b2f0,
+       0x1e48b1d3,
+       0x2008a91f,
+       0x28d7b37f,
+       0x402683af,
+       0x40b137e6,
+       0x44e343f8,
+       0x4c578afb,
+       0x5848998f,
+       0x58d7aac6,
+       0x593cb299,
+       0x6008b28f,
+       0x606323a7,
+       0x60679b77,
+       0x6160ba37,
+       0x62682846,
+       0x6cd7b327,
+       0x82a69f60,
+       0x84763670,
+       0x84e79992,
+       0x8cf3c3fe,
+       0x9aa29964,
+       0x9e605f45,
+       0x9f754e90,
+       0xa020bffe,
+       0xa565474d,
+       0xaa68c34d,
+       0xae27a92c,
+       0xae6baafd,
+       0xaec9bf4c,
+       0xb7714778,
+       0xcce9c6c5,
+       0xccebe930,
+       0xee48b1b4,
+       0x04abc5ca,
+       0x04d9d031,
+       0x0a57c5ce,
+       0x0c6445cb,
+       0x0d0842d9,
+       0x0da3dee4,
+       0x2d09f5c8,
+       0x2e27d0a8,
+       0x2ec8e4e9,
+       0x8841626d,
+       0x8d0864ee,
+       0x996876bb,
+       0x9b07fd6d,
+       0x9b51512e,
+       0x9d0058dc,
+       0x9d3bc4ad,
+       0x9ef354dd,
+       0xd8566066,
+       0xde2d45cb,
+       0xde66fcfe,
+       0xe22275cd,
+       0x053703ae,
+       0x11271d85,
+       0x2706077e,
+       0x2d0ebfa7,
+       0x2e27e4e5,
+       0x444bd9ee,
+       0x5845178f,
+       0x5c642eea,
+       0x5e0a2533,
+       0x84070505,
+       0x844574ea,
+       0x8865a00f,
+       0x8868257d,
+       0x984690ea,
+       0x9c67010f,
+       0x9eae264d,
+       0xae243c5f,
+       0xb5351752,
+       0xde0b8b38,
+       0x18973dca,
+       0x81009434,
+       0x88ba2dbc,
+       0x8942ad2d,
+       0x89d77b5a,
+       0x8eba2554,
+       0x970a7ed3,
+       0x9b9e7b14,
+       0xa1d21ceb,
+       0xa1d69cc0,
+       0xa1d7fab7,
+       0xb6f6940c,
+       0x2c6d76e6,
+       0x3b705478,
+       0x950cec0d,
+       0x9b056094,
+       0xb687163c,
+       0xf6845607,
+       0xfa4666f0,
+       0x53ad92bb,
+       0x71f6940a,
+       0x8bbc6cd6,
+       0x1632b560,
+       0x561a2687,
+       0x5a00c22c,
+       0x7c4f1c15,
+       0x0ee8aacc,
+       0x2838bda9,
+       0x6f3c2ece,
+       0xf1d8d91d,
+}
+
+var loHi = [maxLen + 2]uint16{
+       0,
+       1,
+       8,
+       29,
+       53,
+       89,
+       121,
+       159,
+       192,
+       226,
+       247,
+       266,
+       278,
+       285,
+       288,
+       292,
+       293,
+       296,
 }
 
 var oneByteAtoms = [26]Atom{