]> Cypherpunks repositories - gostls13.git/commitdiff
exp/template/html: allow quotes on either side of conditionals and dynamic HTML names
authorMike Samuel <mikesamuel@gmail.com>
Mon, 19 Sep 2011 02:10:15 +0000 (19:10 -0700)
committerMike Samuel <mikesamuel@gmail.com>
Mon, 19 Sep 2011 02:10:15 +0000 (19:10 -0700)
This addresses several use cases:

(1) <h{{.HeaderLevel}}> used to build hierarchical documents.
(2) <input on{{.EventType}}=...> used in widgets.
(3) <div {{" dir=ltr"}}> used to embed bidi-hints.

It also makes sure that we treat the two templates below the same:

<img src={{if .Avatar}}"{{.Avatar}}"{{else}}"anonymous.png"{{end}}>
<img src="{{if .Avatar}}{{.Avatar}}{{else}}anonymous.png{{end}}">

This splits up tTag into a number of sub-states and adds testcases.

R=nigeltao
CC=golang-dev
https://golang.org/cl/5043042

src/pkg/exp/template/html/content.go
src/pkg/exp/template/html/content_test.go
src/pkg/exp/template/html/context.go
src/pkg/exp/template/html/escape.go
src/pkg/exp/template/html/escape_test.go
src/pkg/exp/template/html/html.go
src/pkg/exp/template/html/transition.go

index 4f792004050f1efd7fbff602c6a4035bf6e1ee61..8b9809b982e5883feb8320de825712e2c10b0b1d 100644 (file)
@@ -19,11 +19,15 @@ type (
        CSS string
 
        // HTML encapsulates a known safe HTML document fragment.
-       // Should not be used for HTML from a third-party, or HTML with
+       // It should not be used for HTML from a third-party, or HTML with
        // unclosed tags or comments. The outputs of a sound HTML sanitizer
        // and a template escaped by this package are fine for use with HTML.
        HTML string
 
+       // HTMLAttr encapsulates an HTML attribute from a trusted source,
+       // for example: ` dir="ltr"`.
+       HTMLAttr string
+
        // JS encapsulates a known safe EcmaScript5 Expression, or example,
        // `(x + y * z())`. 
        // Template authors are responsible for ensuring that typed expressions
@@ -56,6 +60,7 @@ const (
        contentTypePlain contentType = iota
        contentTypeCSS
        contentTypeHTML
+       contentTypeHTMLAttr
        contentTypeJS
        contentTypeJSStr
        contentTypeURL
@@ -71,6 +76,8 @@ func stringify(args ...interface{}) (string, contentType) {
                        return string(s), contentTypeCSS
                case HTML:
                        return string(s), contentTypeHTML
+               case HTMLAttr:
+                       return string(s), contentTypeHTMLAttr
                case JS:
                        return string(s), contentTypeJS
                case JSStr:
index caef5ade8e9652a120333307c06040919dea2e88..033dee1747c992d0807c17b8078d83762bea545c 100644 (file)
@@ -16,6 +16,7 @@ func TestTypedContent(t *testing.T) {
                `<b> "foo%" O'Reilly &bar;`,
                CSS(`a[href =~ "//example.com"]#foo`),
                HTML(`Hello, <b>World</b> &amp;tc!`),
+               HTMLAttr(` dir="ltr"`),
                JS(`c && alert("Hello, World!");`),
                JSStr(`Hello, World & O'Reilly\x21`),
                URL(`greeting=H%69&addressee=(World)`),
@@ -38,6 +39,7 @@ func TestTypedContent(t *testing.T) {
                                `ZgotmplZ`,
                                `ZgotmplZ`,
                                `ZgotmplZ`,
+                               `ZgotmplZ`,
                        },
                },
                {
@@ -50,6 +52,7 @@ func TestTypedContent(t *testing.T) {
                                `ZgotmplZ`,
                                `ZgotmplZ`,
                                `ZgotmplZ`,
+                               `ZgotmplZ`,
                        },
                },
                {
@@ -59,11 +62,25 @@ func TestTypedContent(t *testing.T) {
                                `a[href =~ &#34;//example.com&#34;]#foo`,
                                // Not escaped.
                                `Hello, <b>World</b> &amp;tc!`,
+                               ` dir=&#34;ltr&#34;`,
                                `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
                                `Hello, World &amp; O&#39;Reilly\x21`,
                                `greeting=H%69&amp;addressee=(World)`,
                        },
                },
+               {
+                       `<a{{.}}>`,
+                       []string{
+                               `ZgotmplZ`,
+                               `ZgotmplZ`,
+                               `ZgotmplZ`,
+                               // Allowed and HTML escaped.
+                               ` dir="ltr"`,
+                               `ZgotmplZ`,
+                               `ZgotmplZ`,
+                               `ZgotmplZ`,
+                       },
+               },
                {
                        `<a title={{.}}>`,
                        []string{
@@ -71,6 +88,7 @@ func TestTypedContent(t *testing.T) {
                                `a[href&#32;&#61;~&#32;&#34;//example.com&#34;]#foo`,
                                // Tags stripped, spaces escaped, entity not re-escaped.
                                `Hello,&#32;World&#32;&amp;tc!`,
+                               `&#32;dir&#61;&#34;ltr&#34;`,
                                `c&#32;&amp;&amp;&#32;alert(&#34;Hello,&#32;World!&#34;);`,
                                `Hello,&#32;World&#32;&amp;&#32;O&#39;Reilly\x21`,
                                `greeting&#61;H%69&amp;addressee&#61;(World)`,
@@ -83,6 +101,7 @@ func TestTypedContent(t *testing.T) {
                                `a[href =~ &#34;//example.com&#34;]#foo`,
                                // Tags stripped, entity not re-escaped.
                                `Hello, World &amp;tc!`,
+                               ` dir=&#34;ltr&#34;`,
                                `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
                                `Hello, World &amp; O&#39;Reilly\x21`,
                                `greeting=H%69&amp;addressee=(World)`,
@@ -95,6 +114,7 @@ func TestTypedContent(t *testing.T) {
                                `a[href =~ &#34;//example.com&#34;]#foo`,
                                // Angle brackets escaped to prevent injection of close tags, entity not re-escaped.
                                `Hello, &lt;b&gt;World&lt;/b&gt; &amp;tc!`,
+                               ` dir=&#34;ltr&#34;`,
                                `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
                                `Hello, World &amp; O&#39;Reilly\x21`,
                                `greeting=H%69&amp;addressee=(World)`,
@@ -106,6 +126,7 @@ func TestTypedContent(t *testing.T) {
                                `"\u003cb\u003e \"foo%\" O'Reilly &bar;"`,
                                `"a[href =~ \"//example.com\"]#foo"`,
                                `"Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;tc!"`,
+                               `" dir=\"ltr\""`,
                                // Not escaped.
                                `c && alert("Hello, World!");`,
                                // Escape sequence not over-escaped.
@@ -119,6 +140,7 @@ func TestTypedContent(t *testing.T) {
                                `&#34;\u003cb\u003e \&#34;foo%\&#34; O&#39;Reilly &amp;bar;&#34;`,
                                `&#34;a[href =~ \&#34;//example.com\&#34;]#foo&#34;`,
                                `&#34;Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;amp;tc!&#34;`,
+                               `&#34; dir=\&#34;ltr\&#34;&#34;`,
                                // Not JS escaped but HTML escaped.
                                `c &amp;&amp; alert(&#34;Hello, World!&#34;);`,
                                // Escape sequence not over-escaped.
@@ -132,6 +154,7 @@ func TestTypedContent(t *testing.T) {
                                `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
                                `a[href =~ \x22\/\/example.com\x22]#foo`,
                                `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+                               ` dir=\x22ltr\x22`,
                                `c \x26\x26 alert(\x22Hello, World!\x22);`,
                                // Escape sequence not over-escaped.
                                `Hello, World \x26 O\x27Reilly\x21`,
@@ -144,6 +167,7 @@ func TestTypedContent(t *testing.T) {
                                `\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
                                `a[href =~ \x22\/\/example.com\x22]#foo`,
                                `Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+                               ` dir=\x22ltr\x22`,
                                `c \x26\x26 alert(\x22Hello, World!\x22);`,
                                // Escape sequence not over-escaped.
                                `Hello, World \x26 O\x27Reilly\x21`,
@@ -156,6 +180,7 @@ func TestTypedContent(t *testing.T) {
                                `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
                                `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
                                `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+                               `%20dir%3d%22ltr%22`,
                                `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
                                `Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
                                // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is done.
@@ -168,6 +193,7 @@ func TestTypedContent(t *testing.T) {
                                `%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
                                `a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
                                `Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+                               `%20dir%3d%22ltr%22`,
                                `c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
                                `Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
                                // Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is not done.
index e8812cf86579c07b67f2f2a8a7ad8a0d700dbd56..f7802d04b3155ccf72a094ce561997aa6d4d6f82 100644 (file)
@@ -20,6 +20,7 @@ type context struct {
        delim   delim
        urlPart urlPart
        jsCtx   jsCtx
+       attr    attr
        element element
        err     *Error
 }
@@ -30,6 +31,7 @@ func (c context) eq(d context) bool {
                c.delim == d.delim &&
                c.urlPart == d.urlPart &&
                c.jsCtx == d.jsCtx &&
+               c.attr == d.attr &&
                c.element == d.element &&
                c.err == d.err
 }
@@ -51,6 +53,9 @@ func (c context) mangle(templateName string) string {
        if c.jsCtx != 0 {
                s += "_" + c.jsCtx.String()
        }
+       if c.attr != 0 {
+               s += "_" + c.attr.String()
+       }
        if c.element != 0 {
                s += "_" + c.element.String()
        }
@@ -75,6 +80,15 @@ const (
        stateText state = iota
        // stateTag occurs before an HTML attribute or the end of a tag.
        stateTag
+       // stateAttrName occurs inside an attribute name.
+       // It occurs between the ^'s in ` ^name^ = value`.
+       stateAttrName
+       // stateAfterName occurs after an attr name has ended but before any
+       // equals sign. It occurs between the ^'s in ` name^ ^= value`.
+       stateAfterName
+       // stateBeforeValue occurs after the equals sign but before the value.
+       // It occurs between the ^'s in ` name =^ ^value`.
+       stateBeforeValue
        // stateComment occurs inside an <!-- HTML comment -->.
        stateComment
        // stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
@@ -120,6 +134,9 @@ const (
 var stateNames = [...]string{
        stateText:        "stateText",
        stateTag:         "stateTag",
+       stateAttrName:    "stateAttrName",
+       stateAfterName:   "stateAfterName",
+       stateBeforeValue: "stateBeforeValue",
        stateComment:     "stateComment",
        stateRCDATA:      "stateRCDATA",
        stateAttr:        "stateAttr",
@@ -145,7 +162,7 @@ func (s state) String() string {
        if int(s) < len(stateNames) {
                return stateNames[s]
        }
-       return fmt.Sprintf("illegal state %d", s)
+       return fmt.Sprintf("illegal state %d", int(s))
 }
 
 // delim is the delimiter that will end the current HTML attribute.
@@ -174,7 +191,7 @@ func (d delim) String() string {
        if int(d) < len(delimNames) {
                return delimNames[d]
        }
-       return fmt.Sprintf("illegal delim %d", d)
+       return fmt.Sprintf("illegal delim %d", int(d))
 }
 
 // urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
@@ -207,7 +224,7 @@ func (u urlPart) String() string {
        if int(u) < len(urlPartNames) {
                return urlPartNames[u]
        }
-       return fmt.Sprintf("illegal urlPart %d", u)
+       return fmt.Sprintf("illegal urlPart %d", int(u))
 }
 
 // jsCtx determines whether a '/' starts a regular expression literal or a
@@ -232,7 +249,7 @@ func (c jsCtx) String() string {
        case jsCtxUnknown:
                return "jsCtxUnknown"
        }
-       return fmt.Sprintf("illegal jsCtx %d", c)
+       return fmt.Sprintf("illegal jsCtx %d", int(c))
 }
 
 // element identifies the HTML element when inside a start tag or special body.
@@ -267,5 +284,33 @@ func (e element) String() string {
        if int(e) < len(elementNames) {
                return elementNames[e]
        }
-       return fmt.Sprintf("illegal element %d", e)
+       return fmt.Sprintf("illegal element %d", int(e))
+}
+
+// attr identifies the most recent HTML attribute when inside a start tag.
+type attr uint8
+
+const (
+       // attrNone corresponds to a normal attribute or no attribute.
+       attrNone attr = iota
+       // attrScript corresponds to an event handler attribute.
+       attrScript
+       // attrStyle corresponds to the style attribute whose value is CSS.
+       attrStyle
+       // attrURL corresponds to an attribute whose value is a URL.
+       attrURL
+)
+
+var attrNames = [...]string{
+       attrNone:   "attrNone",
+       attrScript: "attrScript",
+       attrStyle:  "attrStyle",
+       attrURL:    "attrURL",
+}
+
+func (a attr) String() string {
+       if int(a) < len(attrNames) {
+               return attrNames[a]
+       }
+       return fmt.Sprintf("illegal attr %d", int(a))
 }
index f629930df70eaa7a0d80610401d2c3d5d6ddbc1e..a8f3dfc17ddec65c8fe89acd4d5f440dabc0596a 100644 (file)
@@ -66,6 +66,7 @@ var funcMap = template.FuncMap{
        "exp_template_html_attrescaper":     attrEscaper,
        "exp_template_html_cssescaper":      cssEscaper,
        "exp_template_html_cssvaluefilter":  cssValueFilter,
+       "exp_template_html_htmlnamefilter":  htmlNameFilter,
        "exp_template_html_htmlescaper":     htmlEscaper,
        "exp_template_html_jsregexpescaper": jsRegexpEscaper,
        "exp_template_html_jsstrescaper":    jsStrEscaper,
@@ -151,8 +152,11 @@ func (e *escaper) escape(c context, n parse.Node) context {
 
 // escapeAction escapes an action template node.
 func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
+       c = nudge(c)
        s := make([]string, 0, 3)
        switch c.state {
+       case stateError:
+               return c
        case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
                switch c.urlPart {
                case urlPartNone:
@@ -194,6 +198,13 @@ func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
                s = append(s, "exp_template_html_htmlescaper")
        case stateRCDATA:
                s = append(s, "exp_template_html_rcdataescaper")
+       case stateAttr:
+               // Handled below in delim check.
+       case stateAttrName, stateTag:
+               c.state = stateAttrName
+               s = append(s, "exp_template_html_htmlnamefilter")
+       default:
+               panic("unexpected state " + c.state.String())
        }
        switch c.delim {
        case delimNone:
@@ -289,6 +300,33 @@ func newIdentCmd(identifier string) *parse.CommandNode {
        }
 }
 
+// nudge returns the context that would result from following empty string
+// transitions from the input context.
+// For example, parsing:
+//     `<a href=`
+// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
+//     `<a href=x`
+// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
+// There are two transitions that happen when the 'x' is seen:
+// (1) Transition from a before-value state to a start-of-value state without
+//     consuming any character.
+// (2) Consume 'x' and transition past the first value character.
+// In this case, nudging produces the context after (1) happens.
+func nudge(c context) context {
+       switch c.state {
+       case stateTag:
+               // In `<foo {{.}}`, the action should emit an attribute.
+               c.state = stateAttrName
+       case stateBeforeValue:
+               // In `<foo bar={{.}}`, the action is an undelimited value.
+               c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
+       case stateAfterName:
+               // In `<foo bar {{.}}`, the action is an attribute name.
+               c.state, c.attr = stateAttrName, attrNone
+       }
+       return c
+}
+
 // join joins the two contexts of a branch template node. The result is an
 // error context if either of the input contexts are error contexts, or if the
 // the input contexts differ.
@@ -319,6 +357,17 @@ func join(a, b context, line int, nodeName string) context {
                return c
        }
 
+       // Allow a nudged context to join with an unnudged one.
+       // This means that
+       //   <p title={{if .C}}{{.}}{{end}}
+       // ends in an unquoted value state even though the else branch
+       // ends in stateBeforeValue.
+       if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
+               if e := join(c, d, line, nodeName); e.state != stateError {
+                       return e
+               }
+       }
+
        return context{
                state: stateError,
                err:   errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
index b3bd94d66b9fa2b478a8d53abc63c044a110caca..5202aa34a13fa915799c955a43e8a64af8da04be 100644 (file)
@@ -411,6 +411,51 @@ func TestEscape(t *testing.T) {
                        `<textarea><{{"/textarea "}}...</textarea>`,
                        `<textarea>&lt;/textarea ...</textarea>`,
                },
+               {
+                       "optional attrs",
+                       `<img class="{{"iconClass"}}"` +
+                               `{{if .T}} id="{{"<iconId>"}}"{{end}}` +
+                               // Double quotes inside if/else.
+                               ` src=` +
+                               `{{if .T}}"?{{"<iconPath>"}}"` +
+                               `{{else}}"images/cleardot.gif"{{end}}` +
+                               // Missing space before title, but it is not a
+                               // part of the src attribute.
+                               `{{if .T}}title="{{"<title>"}}"{{end}}` +
+                               // Quotes outside if/else.
+                               ` alt="` +
+                               `{{if .T}}{{"<alt>"}}` +
+                               `{{else}}{{if .F}}{{"<title>"}}{{end}}` +
+                               `{{end}}"` +
+                               `>`,
+                       `<img class="iconClass" id="&lt;iconId&gt;" src="?%3ciconPath%3e"title="&lt;title&gt;" alt="&lt;alt&gt;">`,
+               },
+               {
+                       "conditional valueless attr name",
+                       `<input{{if .T}} checked{{end}} name=n>`,
+                       `<input checked name=n>`,
+               },
+               {
+                       "conditional dynamic valueless attr name 1",
+                       `<input{{if .T}} {{"checked"}}{{end}} name=n>`,
+                       `<input checked name=n>`,
+               },
+               {
+                       "conditional dynamic valueless attr name 2",
+                       `<input {{if .T}}{{"checked"}} {{end}}name=n>`,
+                       `<input checked name=n>`,
+               },
+               {
+                       "dynamic attribute name",
+                       `<img on{{"load"}}="alert({{"loaded"}})">`,
+                       // Treated as JS since quotes are inserted.
+                       `<img onload="alert(&#34;loaded&#34;)">`,
+               },
+               {
+                       "dynamic element name",
+                       `<h{{3}}><table><t{{"head"}}>...</h{{3}}>`,
+                       `<h3><table><thead>...</h3>`,
+               },
        }
 
        for _, test := range tests {
@@ -780,9 +825,25 @@ func TestEscapeText(t *testing.T) {
                        `<a>`,
                        context{state: stateText},
                },
+               {
+                       `<a href`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a on`,
+                       context{state: stateAttrName, attr: attrScript},
+               },
+               {
+                       `<a href `,
+                       context{state: stateAfterName, attr: attrURL},
+               },
+               {
+                       `<a style  =  `,
+                       context{state: stateBeforeValue, attr: attrStyle},
+               },
                {
                        `<a href=`,
-                       context{state: stateURL, delim: delimSpaceOrTagEnd},
+                       context{state: stateBeforeValue, attr: attrURL},
                },
                {
                        `<a href=x`,
index 8805e7ad3d97af6207d48876f30f9b3c20599f9d..52472d193e1f15b4e773559b10eff2cf824f3680 100644 (file)
@@ -205,3 +205,22 @@ func stripTags(html string) string {
        }
        return b.String()
 }
+
+// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
+// a known-safe HTML attribute.
+func htmlNameFilter(args ...interface{}) string {
+       s, t := stringify(args...)
+       if t == contentTypeHTMLAttr {
+               return s
+       }
+       for _, r := range s {
+               switch {
+               case '0' <= r && r <= '9':
+               case 'A' <= r && r <= 'Z':
+               case 'a' <= r && r <= 'z':
+               default:
+                       return filterFailsafe
+               }
+       }
+       return s
+}
index 450dda43c423136557c5b513e45feaabe024e996..6b10561caa4856373abd6e2d170014238099dcfa 100644 (file)
@@ -18,6 +18,9 @@ import (
 var transitionFunc = [...]func(context, []byte) (context, []byte){
        stateText:        tText,
        stateTag:         tTag,
+       stateAttrName:    tAttrName,
+       stateAfterName:   tAfterName,
+       stateBeforeValue: tBeforeValue,
        stateComment:     tComment,
        stateRCDATA:      tSpecialTagEnd,
        stateAttr:        tAttr,
@@ -79,54 +82,90 @@ var elementContentType = [...]state{
 // tTag is the context transition function for the tag state.
 func tTag(c context, s []byte) (context, []byte) {
        // Find the attribute name.
-       attrStart := eatWhiteSpace(s, 0)
-       i, err := eatAttrName(s, attrStart)
-       if err != nil {
-               return context{
-                       state: stateError,
-                       err:   err,
-               }, nil
-       }
+       i := eatWhiteSpace(s, 0)
        if i == len(s) {
                return c, nil
        }
-       state := stateAttr
-       canonAttrName := strings.ToLower(string(s[attrStart:i]))
-       if urlAttr[canonAttrName] {
-               state = stateURL
-       } else if strings.HasPrefix(canonAttrName, "on") {
-               state = stateJS
-       } else if canonAttrName == "style" {
-               state = stateCSS
+       if s[i] == '>' {
+               return context{
+                       state:   elementContentType[c.element],
+                       element: c.element,
+               }, s[i+1:]
+       }
+       j, err := eatAttrName(s, i)
+       if err != nil {
+               return context{state: stateError, err: err}, nil
+       }
+       state, attr := stateTag, attrNone
+       if i != j {
+               canonAttrName := strings.ToLower(string(s[i:j]))
+               if urlAttr[canonAttrName] {
+                       attr = attrURL
+               } else if strings.HasPrefix(canonAttrName, "on") {
+                       attr = attrScript
+               } else if canonAttrName == "style" {
+                       attr = attrStyle
+               }
+               if j == len(s) {
+                       state = stateAttrName
+               } else {
+                       state = stateAfterName
+               }
+       }
+       return context{state: state, element: c.element, attr: attr}, s[j:]
+}
+
+// tAttrName is the context transition function for stateAttrName.
+func tAttrName(c context, s []byte) (context, []byte) {
+       i, err := eatAttrName(s, 0)
+       if err != nil {
+               return context{state: stateError, err: err}, nil
+       } else if i == len(s) {
+               return c, nil
        }
+       c.state = stateAfterName
+       return c, s[i:]
+}
 
+// tAfterName is the context transition function for stateAfterName.
+func tAfterName(c context, s []byte) (context, []byte) {
        // Look for the start of the value.
-       i = eatWhiteSpace(s, i)
+       i := eatWhiteSpace(s, 0)
        if i == len(s) {
-               return c, s[i:]
-       }
-       if s[i] == '>' {
-               state = elementContentType[c.element]
-               return context{state: state, element: c.element}, s[i+1:]
+               return c, nil
        } else if s[i] != '=' {
-               // Possible due to a valueless attribute or '/' in "<input />".
+               // Occurs due to tag ending '>', and valueless attribute.
+               c.state = stateTag
                return c, s[i:]
        }
+       c.state = stateBeforeValue
        // Consume the "=".
-       i = eatWhiteSpace(s, i+1)
+       return c, s[i+1:]
+}
 
+var attrStartStates = [...]state{
+       attrNone:   stateAttr,
+       attrScript: stateJS,
+       attrStyle:  stateCSS,
+       attrURL:    stateURL,
+}
+
+// tBeforeValue is the context transition function for stateBeforeValue.
+func tBeforeValue(c context, s []byte) (context, []byte) {
+       i := eatWhiteSpace(s, 0)
+       if i == len(s) {
+               return c, nil
+       }
        // Find the attribute delimiter.
        delim := delimSpaceOrTagEnd
-       if i < len(s) {
-               switch s[i] {
-               case '\'':
-                       delim, i = delimSingleQuote, i+1
-               case '"':
-                       delim, i = delimDoubleQuote, i+1
-               }
+       switch s[i] {
+       case '\'':
+               delim, i = delimSingleQuote, i+1
+       case '"':
+               delim, i = delimDoubleQuote, i+1
        }
-
-       return context{state: state, delim: delim, element: c.element}, s[i:]
+       c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
+       return c, s[i:]
 }
 
 // tComment is the context transition function for stateComment.