CSS string
// HTML encapsulates a known safe HTML document fragment.
- // Should not be used for HTML from a third-party, or HTML with
+ // It should not be used for HTML from a third-party, or HTML with
// unclosed tags or comments. The outputs of a sound HTML sanitizer
// and a template escaped by this package are fine for use with HTML.
HTML string
+ // HTMLAttr encapsulates an HTML attribute from a trusted source,
+ // for example: ` dir="ltr"`.
+ HTMLAttr string
+
// JS encapsulates a known safe EcmaScript5 Expression, or example,
// `(x + y * z())`.
// Template authors are responsible for ensuring that typed expressions
contentTypePlain contentType = iota
contentTypeCSS
contentTypeHTML
+ contentTypeHTMLAttr
contentTypeJS
contentTypeJSStr
contentTypeURL
return string(s), contentTypeCSS
case HTML:
return string(s), contentTypeHTML
+ case HTMLAttr:
+ return string(s), contentTypeHTMLAttr
case JS:
return string(s), contentTypeJS
case JSStr:
`<b> "foo%" O'Reilly &bar;`,
CSS(`a[href =~ "//example.com"]#foo`),
HTML(`Hello, <b>World</b> &tc!`),
+ HTMLAttr(` dir="ltr"`),
JS(`c && alert("Hello, World!");`),
JSStr(`Hello, World & O'Reilly\x21`),
URL(`greeting=H%69&addressee=(World)`),
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
+ `ZgotmplZ`,
},
},
{
`ZgotmplZ`,
`ZgotmplZ`,
`ZgotmplZ`,
+ `ZgotmplZ`,
},
},
{
`a[href =~ "//example.com"]#foo`,
// Not escaped.
`Hello, <b>World</b> &tc!`,
+ ` dir="ltr"`,
`c && alert("Hello, World!");`,
`Hello, World & O'Reilly\x21`,
`greeting=H%69&addressee=(World)`,
},
},
+ {
+ `<a{{.}}>`,
+ []string{
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ // Allowed and HTML escaped.
+ ` dir="ltr"`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ `ZgotmplZ`,
+ },
+ },
{
`<a title={{.}}>`,
[]string{
`a[href =~ "//example.com"]#foo`,
// Tags stripped, spaces escaped, entity not re-escaped.
`Hello, World &tc!`,
+ ` dir="ltr"`,
`c && alert("Hello, World!");`,
`Hello, World & O'Reilly\x21`,
`greeting=H%69&addressee=(World)`,
`a[href =~ "//example.com"]#foo`,
// Tags stripped, entity not re-escaped.
`Hello, World &tc!`,
+ ` dir="ltr"`,
`c && alert("Hello, World!");`,
`Hello, World & O'Reilly\x21`,
`greeting=H%69&addressee=(World)`,
`a[href =~ "//example.com"]#foo`,
// Angle brackets escaped to prevent injection of close tags, entity not re-escaped.
`Hello, <b>World</b> &tc!`,
+ ` dir="ltr"`,
`c && alert("Hello, World!");`,
`Hello, World & O'Reilly\x21`,
`greeting=H%69&addressee=(World)`,
`"\u003cb\u003e \"foo%\" O'Reilly &bar;"`,
`"a[href =~ \"//example.com\"]#foo"`,
`"Hello, \u003cb\u003eWorld\u003c/b\u003e &tc!"`,
+ `" dir=\"ltr\""`,
// Not escaped.
`c && alert("Hello, World!");`,
// Escape sequence not over-escaped.
`"\u003cb\u003e \"foo%\" O'Reilly &bar;"`,
`"a[href =~ \"//example.com\"]#foo"`,
`"Hello, \u003cb\u003eWorld\u003c/b\u003e &amp;tc!"`,
+ `" dir=\"ltr\""`,
// Not JS escaped but HTML escaped.
`c && alert("Hello, World!");`,
// Escape sequence not over-escaped.
`\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
`a[href =~ \x22\/\/example.com\x22]#foo`,
`Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+ ` dir=\x22ltr\x22`,
`c \x26\x26 alert(\x22Hello, World!\x22);`,
// Escape sequence not over-escaped.
`Hello, World \x26 O\x27Reilly\x21`,
`\x3cb\x3e \x22foo%\x22 O\x27Reilly \x26bar;`,
`a[href =~ \x22\/\/example.com\x22]#foo`,
`Hello, \x3cb\x3eWorld\x3c\/b\x3e \x26amp;tc!`,
+ ` dir=\x22ltr\x22`,
`c \x26\x26 alert(\x22Hello, World!\x22);`,
// Escape sequence not over-escaped.
`Hello, World \x26 O\x27Reilly\x21`,
`%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
`a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
`Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+ `%20dir%3d%22ltr%22`,
`c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
`Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
// Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is done.
`%3cb%3e%20%22foo%25%22%20O%27Reilly%20%26bar%3b`,
`a%5bhref%20%3d~%20%22%2f%2fexample.com%22%5d%23foo`,
`Hello%2c%20%3cb%3eWorld%3c%2fb%3e%20%26amp%3btc%21`,
+ `%20dir%3d%22ltr%22`,
`c%20%26%26%20alert%28%22Hello%2c%20World%21%22%29%3b`,
`Hello%2c%20World%20%26%20O%27Reilly%5cx21`,
// Quotes and parens are escaped but %69 is not over-escaped. HTML escaping is not done.
delim delim
urlPart urlPart
jsCtx jsCtx
+ attr attr
element element
err *Error
}
c.delim == d.delim &&
c.urlPart == d.urlPart &&
c.jsCtx == d.jsCtx &&
+ c.attr == d.attr &&
c.element == d.element &&
c.err == d.err
}
if c.jsCtx != 0 {
s += "_" + c.jsCtx.String()
}
+ if c.attr != 0 {
+ s += "_" + c.attr.String()
+ }
if c.element != 0 {
s += "_" + c.element.String()
}
stateText state = iota
// stateTag occurs before an HTML attribute or the end of a tag.
stateTag
+ // stateAttrName occurs inside an attribute name.
+ // It occurs between the ^'s in ` ^name^ = value`.
+ stateAttrName
+ // stateAfterName occurs after an attr name has ended but before any
+ // equals sign. It occurs between the ^'s in ` name^ ^= value`.
+ stateAfterName
+ // stateBeforeValue occurs after the equals sign but before the value.
+ // It occurs between the ^'s in ` name =^ ^value`.
+ stateBeforeValue
// stateComment occurs inside an <!-- HTML comment -->.
stateComment
// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
var stateNames = [...]string{
stateText: "stateText",
stateTag: "stateTag",
+ stateAttrName: "stateAttrName",
+ stateAfterName: "stateAfterName",
+ stateBeforeValue: "stateBeforeValue",
stateComment: "stateComment",
stateRCDATA: "stateRCDATA",
stateAttr: "stateAttr",
if int(s) < len(stateNames) {
return stateNames[s]
}
- return fmt.Sprintf("illegal state %d", s)
+ return fmt.Sprintf("illegal state %d", int(s))
}
// delim is the delimiter that will end the current HTML attribute.
if int(d) < len(delimNames) {
return delimNames[d]
}
- return fmt.Sprintf("illegal delim %d", d)
+ return fmt.Sprintf("illegal delim %d", int(d))
}
// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
if int(u) < len(urlPartNames) {
return urlPartNames[u]
}
- return fmt.Sprintf("illegal urlPart %d", u)
+ return fmt.Sprintf("illegal urlPart %d", int(u))
}
// jsCtx determines whether a '/' starts a regular expression literal or a
case jsCtxUnknown:
return "jsCtxUnknown"
}
- return fmt.Sprintf("illegal jsCtx %d", c)
+ return fmt.Sprintf("illegal jsCtx %d", int(c))
}
// element identifies the HTML element when inside a start tag or special body.
if int(e) < len(elementNames) {
return elementNames[e]
}
- return fmt.Sprintf("illegal element %d", e)
+ return fmt.Sprintf("illegal element %d", int(e))
+}
+
+// attr identifies the most recent HTML attribute when inside a start tag.
+type attr uint8
+
+const (
+ // attrNone corresponds to a normal attribute or no attribute.
+ attrNone attr = iota
+ // attrScript corresponds to an event handler attribute.
+ attrScript
+ // attrStyle corresponds to the style attribute whose value is CSS.
+ attrStyle
+ // attrURL corresponds to an attribute whose value is a URL.
+ attrURL
+)
+
+var attrNames = [...]string{
+ attrNone: "attrNone",
+ attrScript: "attrScript",
+ attrStyle: "attrStyle",
+ attrURL: "attrURL",
+}
+
+func (a attr) String() string {
+ if int(a) < len(attrNames) {
+ return attrNames[a]
+ }
+ return fmt.Sprintf("illegal attr %d", int(a))
}
"exp_template_html_attrescaper": attrEscaper,
"exp_template_html_cssescaper": cssEscaper,
"exp_template_html_cssvaluefilter": cssValueFilter,
+ "exp_template_html_htmlnamefilter": htmlNameFilter,
"exp_template_html_htmlescaper": htmlEscaper,
"exp_template_html_jsregexpescaper": jsRegexpEscaper,
"exp_template_html_jsstrescaper": jsStrEscaper,
// escapeAction escapes an action template node.
func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
+ c = nudge(c)
s := make([]string, 0, 3)
switch c.state {
+ case stateError:
+ return c
case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
switch c.urlPart {
case urlPartNone:
s = append(s, "exp_template_html_htmlescaper")
case stateRCDATA:
s = append(s, "exp_template_html_rcdataescaper")
+ case stateAttr:
+ // Handled below in delim check.
+ case stateAttrName, stateTag:
+ c.state = stateAttrName
+ s = append(s, "exp_template_html_htmlnamefilter")
+ default:
+ panic("unexpected state " + c.state.String())
}
switch c.delim {
case delimNone:
}
}
+// nudge returns the context that would result from following empty string
+// transitions from the input context.
+// For example, parsing:
+// `<a href=`
+// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
+// `<a href=x`
+// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
+// There are two transitions that happen when the 'x' is seen:
+// (1) Transition from a before-value state to a start-of-value state without
+// consuming any character.
+// (2) Consume 'x' and transition past the first value character.
+// In this case, nudging produces the context after (1) happens.
+func nudge(c context) context {
+ switch c.state {
+ case stateTag:
+ // In `<foo {{.}}`, the action should emit an attribute.
+ c.state = stateAttrName
+ case stateBeforeValue:
+ // In `<foo bar={{.}}`, the action is an undelimited value.
+ c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
+ case stateAfterName:
+ // In `<foo bar {{.}}`, the action is an attribute name.
+ c.state, c.attr = stateAttrName, attrNone
+ }
+ return c
+}
+
// join joins the two contexts of a branch template node. The result is an
// error context if either of the input contexts are error contexts, or if the
// the input contexts differ.
return c
}
+ // Allow a nudged context to join with an unnudged one.
+ // This means that
+ // <p title={{if .C}}{{.}}{{end}}
+ // ends in an unquoted value state even though the else branch
+ // ends in stateBeforeValue.
+ if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
+ if e := join(c, d, line, nodeName); e.state != stateError {
+ return e
+ }
+ }
+
return context{
state: stateError,
err: errorf(ErrBranchEnd, line, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
`<textarea><{{"/textarea "}}...</textarea>`,
`<textarea></textarea ...</textarea>`,
},
+ {
+ "optional attrs",
+ `<img class="{{"iconClass"}}"` +
+ `{{if .T}} id="{{"<iconId>"}}"{{end}}` +
+ // Double quotes inside if/else.
+ ` src=` +
+ `{{if .T}}"?{{"<iconPath>"}}"` +
+ `{{else}}"images/cleardot.gif"{{end}}` +
+ // Missing space before title, but it is not a
+ // part of the src attribute.
+ `{{if .T}}title="{{"<title>"}}"{{end}}` +
+ // Quotes outside if/else.
+ ` alt="` +
+ `{{if .T}}{{"<alt>"}}` +
+ `{{else}}{{if .F}}{{"<title>"}}{{end}}` +
+ `{{end}}"` +
+ `>`,
+ `<img class="iconClass" id="<iconId>" src="?%3ciconPath%3e"title="<title>" alt="<alt>">`,
+ },
+ {
+ "conditional valueless attr name",
+ `<input{{if .T}} checked{{end}} name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "conditional dynamic valueless attr name 1",
+ `<input{{if .T}} {{"checked"}}{{end}} name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "conditional dynamic valueless attr name 2",
+ `<input {{if .T}}{{"checked"}} {{end}}name=n>`,
+ `<input checked name=n>`,
+ },
+ {
+ "dynamic attribute name",
+ `<img on{{"load"}}="alert({{"loaded"}})">`,
+ // Treated as JS since quotes are inserted.
+ `<img onload="alert("loaded")">`,
+ },
+ {
+ "dynamic element name",
+ `<h{{3}}><table><t{{"head"}}>...</h{{3}}>`,
+ `<h3><table><thead>...</h3>`,
+ },
}
for _, test := range tests {
`<a>`,
context{state: stateText},
},
+ {
+ `<a href`,
+ context{state: stateAttrName, attr: attrURL},
+ },
+ {
+ `<a on`,
+ context{state: stateAttrName, attr: attrScript},
+ },
+ {
+ `<a href `,
+ context{state: stateAfterName, attr: attrURL},
+ },
+ {
+ `<a style = `,
+ context{state: stateBeforeValue, attr: attrStyle},
+ },
{
`<a href=`,
- context{state: stateURL, delim: delimSpaceOrTagEnd},
+ context{state: stateBeforeValue, attr: attrURL},
},
{
`<a href=x`,
}
return b.String()
}
+
+// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
+// a known-safe HTML attribute.
+func htmlNameFilter(args ...interface{}) string {
+ s, t := stringify(args...)
+ if t == contentTypeHTMLAttr {
+ return s
+ }
+ for _, r := range s {
+ switch {
+ case '0' <= r && r <= '9':
+ case 'A' <= r && r <= 'Z':
+ case 'a' <= r && r <= 'z':
+ default:
+ return filterFailsafe
+ }
+ }
+ return s
+}
var transitionFunc = [...]func(context, []byte) (context, []byte){
stateText: tText,
stateTag: tTag,
+ stateAttrName: tAttrName,
+ stateAfterName: tAfterName,
+ stateBeforeValue: tBeforeValue,
stateComment: tComment,
stateRCDATA: tSpecialTagEnd,
stateAttr: tAttr,
// tTag is the context transition function for the tag state.
func tTag(c context, s []byte) (context, []byte) {
// Find the attribute name.
- attrStart := eatWhiteSpace(s, 0)
- i, err := eatAttrName(s, attrStart)
- if err != nil {
- return context{
- state: stateError,
- err: err,
- }, nil
- }
+ i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, nil
}
- state := stateAttr
- canonAttrName := strings.ToLower(string(s[attrStart:i]))
- if urlAttr[canonAttrName] {
- state = stateURL
- } else if strings.HasPrefix(canonAttrName, "on") {
- state = stateJS
- } else if canonAttrName == "style" {
- state = stateCSS
+ if s[i] == '>' {
+ return context{
+ state: elementContentType[c.element],
+ element: c.element,
+ }, s[i+1:]
+ }
+ j, err := eatAttrName(s, i)
+ if err != nil {
+ return context{state: stateError, err: err}, nil
+ }
+ state, attr := stateTag, attrNone
+ if i != j {
+ canonAttrName := strings.ToLower(string(s[i:j]))
+ if urlAttr[canonAttrName] {
+ attr = attrURL
+ } else if strings.HasPrefix(canonAttrName, "on") {
+ attr = attrScript
+ } else if canonAttrName == "style" {
+ attr = attrStyle
+ }
+ if j == len(s) {
+ state = stateAttrName
+ } else {
+ state = stateAfterName
+ }
+ }
+ return context{state: state, element: c.element, attr: attr}, s[j:]
+}
+
+// tAttrName is the context transition function for stateAttrName.
+func tAttrName(c context, s []byte) (context, []byte) {
+ i, err := eatAttrName(s, 0)
+ if err != nil {
+ return context{state: stateError, err: err}, nil
+ } else if i == len(s) {
+ return c, nil
}
+ c.state = stateAfterName
+ return c, s[i:]
+}
+// tAfterName is the context transition function for stateAfterName.
+func tAfterName(c context, s []byte) (context, []byte) {
// Look for the start of the value.
- i = eatWhiteSpace(s, i)
+ i := eatWhiteSpace(s, 0)
if i == len(s) {
- return c, s[i:]
- }
- if s[i] == '>' {
- state = elementContentType[c.element]
- return context{state: state, element: c.element}, s[i+1:]
+ return c, nil
} else if s[i] != '=' {
- // Possible due to a valueless attribute or '/' in "<input />".
+ // Occurs due to tag ending '>', and valueless attribute.
+ c.state = stateTag
return c, s[i:]
}
+ c.state = stateBeforeValue
// Consume the "=".
- i = eatWhiteSpace(s, i+1)
+ return c, s[i+1:]
+}
+var attrStartStates = [...]state{
+ attrNone: stateAttr,
+ attrScript: stateJS,
+ attrStyle: stateCSS,
+ attrURL: stateURL,
+}
+
+// tBeforeValue is the context transition function for stateBeforeValue.
+func tBeforeValue(c context, s []byte) (context, []byte) {
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, nil
+ }
// Find the attribute delimiter.
delim := delimSpaceOrTagEnd
- if i < len(s) {
- switch s[i] {
- case '\'':
- delim, i = delimSingleQuote, i+1
- case '"':
- delim, i = delimDoubleQuote, i+1
- }
+ switch s[i] {
+ case '\'':
+ delim, i = delimSingleQuote, i+1
+ case '"':
+ delim, i = delimDoubleQuote, i+1
}
-
- return context{state: state, delim: delim, element: c.element}, s[i:]
+ c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
+ return c, s[i:]
}
// tComment is the context transition function for stateComment.