type context struct {
state state
delim delim
+ urlPart urlPart
errLine int
errStr string
}
// eq returns whether two contexts are equal.
func (c context) eq(d context) bool {
- return c.state == d.state && c.delim == d.delim && c.errLine == d.errLine && c.errStr == d.errStr
+ return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr
}
// state describes a high-level HTML parser state.
}
return fmt.Sprintf("illegal delim %d", d)
}
+
+// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
+// encoding strategies.
+type urlPart uint8
+
+const (
+ // urlPartNone occurs when not in a URL, or possibly at the start:
+ // ^ in "^http://auth/path?k=v#frag".
+ urlPartNone urlPart = iota
+ // urlPartPreQuery occurs in the scheme, authority, or path; between the
+ // ^s in "h^ttp://auth/path^?k=v#frag".
+ urlPartPreQuery
+ // urlPartQueryOrFrag occurs in the query portion between the ^s in
+ // "http://auth/path?^k=v#frag^".
+ urlPartQueryOrFrag
+ // urlPartUnknown occurs due to joining of contexts both before and after
+ // the query separator.
+ urlPartUnknown
+)
+
+var urlPartNames = [...]string{
+ urlPartNone: "urlPartNone",
+ urlPartPreQuery: "urlPartPreQuery",
+ urlPartQueryOrFrag: "urlPartQueryOrFrag",
+ urlPartUnknown: "urlPartUnknown",
+}
+
+func (u urlPart) String() string {
+ if int(u) < len(urlPartNames) {
+ return urlPartNames[u]
+ }
+ return fmt.Sprintf("illegal urlPart %d", u)
+}
import (
"bytes"
"fmt"
+ "html"
"os"
"strings"
"template"
if c.state != stateText {
return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c)
}
+ t.Funcs(funcMap)
return t, nil
}
+// funcMap maps command names to functions that render their inputs safe.
+var funcMap = template.FuncMap{
+ "exp_template_html_urlfilter": urlFilter,
+}
+
// escape escapes a template node.
func escape(c context, n parse.Node) context {
switch n := n.(type) {
func escapeAction(c context, n *parse.ActionNode) context {
sanitizer := "html"
if c.state == stateURL {
- sanitizer = "urlquery"
+ switch c.urlPart {
+ case urlPartNone:
+ sanitizer = "exp_template_html_urlfilter"
+ case urlPartQueryOrFrag:
+ sanitizer = "urlquery"
+ case urlPartPreQuery:
+ // The default "html" works here.
+ case urlPartUnknown:
+ return context{
+ state: stateError,
+ errLine: n.Line,
+ errStr: fmt.Sprintf("%s appears in an ambiguous URL context", n),
+ }
+ default:
+ panic(c.urlPart.String())
+ }
}
// If the pipe already ends with the sanitizer, do not interfere.
if m := len(n.Pipe.Cmds); m != 0 {
if a.eq(b) {
return a
}
+
+ c := a
+ c.urlPart = b.urlPart
+ if c.eq(b) {
+ // The contexts differ only by urlPart.
+ c.urlPart = urlPartUnknown
+ return c
+ }
+
return context{
state: stateError,
errLine: line,
i := bytes.IndexAny(s, delimEnds[c.delim])
if i == -1 {
// Remain inside the attribute.
- // TODO: Recurse to take into account grammars for
- // JS, CSS, URIs embedded in attrs once implemented.
+ // Decode the value so non-HTML rules can easily handle
+ // <button onclick="alert("Hi!")">
+ // without having to entity decode token boundaries.
+ d := c.delim
+ c.delim = delimNone
+ c = escapeText(c, []byte(html.UnescapeString(string(s))))
+ if c.state != stateError {
+ c.delim = d
+ }
return c
}
if c.delim != delimSpaceOrTagEnd {
// tURL is the context transition function for the URL state.
func tURL(c context, s []byte) (context, []byte) {
- // TODO: Look for query and fragment boundaries within a URL so we
- // can %-encode actions in the query and fragment parts, HTML escape
- // actions elsewhere, and filter any actions at the start that might
- // inject a dangerous protocol such as "javascript:".
+ if bytes.IndexAny(s, "#?") >= 0 {
+ c.urlPart = urlPartQueryOrFrag
+ } else if c.urlPart == urlPartNone {
+ c.urlPart = urlPartPreQuery
+ }
return c, nil
}
"src": true,
"usemap": true,
}
+
+// urlFilter returns the HTML equivalent of its input unless it contains an
+// unsafe protocol in which case it defangs the entire URL.
+func urlFilter(args ...interface{}) string {
+ ok := false
+ var s string
+ if len(args) == 1 {
+ s, ok = args[0].(string)
+ }
+ if !ok {
+ s = fmt.Sprint(args...)
+ }
+ i := strings.IndexRune(s, ':')
+ if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+ protocol := strings.ToLower(s[:i])
+ if protocol != "http" && protocol != "https" && protocol != "mailto" {
+ // Return a value that someone investigating a bug
+ // report can put into a search engine.
+ return "#ZgotmplZ"
+ }
+ }
+ // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
+ // we will need to stop this from HTML escaping and pipeline sanitizers.
+ return template.HTMLEscapeString(s)
+}
// in the obsolete "mark" rule in an appendix in RFC 3986 so can be
// safely encoded.
"constant",
- `<a href="{{"'a<b'"}}">`,
- `<a href="'a%3Cb'">`,
+ `<a href="/search?q={{"'a<b'"}}">`,
+ `<a href="/search?q='a%3Cb'">`,
},
{
"multipleAttrs",
"<a b=1 c={{.H}}>",
"<a b=1 c=<Hello>>",
},
+ {
+ "urlStartRel",
+ `<a href='{{"/foo/bar?a=b&c=d"}}'>`,
+ `<a href='/foo/bar?a=b&c=d'>`,
+ },
+ {
+ "urlStartAbsOk",
+ `<a href='{{"http://example.com/foo/bar?a=b&c=d"}}'>`,
+ `<a href='http://example.com/foo/bar?a=b&c=d'>`,
+ },
+ {
+ "protocolRelativeURLStart",
+ `<a href='{{"//example.com:8000/foo/bar?a=b&c=d"}}'>`,
+ `<a href='//example.com:8000/foo/bar?a=b&c=d'>`,
+ },
+ {
+ "pathRelativeURLStart",
+ `<a href="{{"/javascript:80/foo/bar"}}">`,
+ `<a href="/javascript:80/foo/bar">`,
+ },
+ {
+ "dangerousURLStart",
+ `<a href='{{"javascript:alert(%22pwned%22)"}}'>`,
+ `<a href='#ZgotmplZ'>`,
+ },
+ {
+ "urlPath",
+ `<a href='http://{{"javascript:80"}}/foo'>`,
+ `<a href='http://javascript:80/foo'>`,
+ },
+ {
+ "urlQuery",
+ `<a href='/search?q={{.H}}'>`,
+ `<a href='/search?q=%3CHello%3E'>`,
+ },
+ {
+ "urlFragment",
+ `<a href='/faq#{{.H}}'>`,
+ `<a href='/faq#%3CHello%3E'>`,
+ },
+ {
+ "urlBranch",
+ `<a href="{{if .F}}/foo?a=b{{else}}/bar{{end}}">`,
+ `<a href="/bar">`,
+ },
+ {
+ "urlBranchConflictMoot",
+ `<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
+ `<a href="/foo?a=%3CCincinatti%3E">`,
+ },
}
for _, tc := range testCases {
"<a b=1 c={{.H}}",
"z ends in a non-text context: {stateAttr delimSpaceOrTagEnd",
},
+ {
+ `<a href="{{if .F}}/foo?a={{else}}/bar/{{end}}{{.H}}">`,
+ "z:1: (action: [(command: [F=[H]])]) appears in an ambiguous URL context",
+ },
}
for _, tc := range testCases {
},
{
`<a href=x`,
- context{state: stateURL, delim: delimSpaceOrTagEnd},
+ context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
},
{
`<a href=x `,
},
{
`<a HREF='http:`,
- context{state: stateURL, delim: delimSingleQuote},
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
},
{
`<a Href='/`,
- context{state: stateURL, delim: delimSingleQuote},
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
},
{
`<a href='"`,
- context{state: stateURL, delim: delimSingleQuote},
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
},
{
`<a href="'`,
- context{state: stateURL, delim: delimDoubleQuote},
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href=''`,
+ context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href=""`,
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href=""`,
+ context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a href="`,
+ context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
},
{
`<img alt="1">`,