]> Cypherpunks repositories - gostls13.git/commitdiff
exp/template/html: Added handling for URL attributes.
authorMike Samuel <mikesamuel@gmail.com>
Tue, 30 Aug 2011 01:42:30 +0000 (11:42 +1000)
committerNigel Tao <nigeltao@golang.org>
Tue, 30 Aug 2011 01:42:30 +0000 (11:42 +1000)
1. adds a urlPart field to context
2. implements tURL to figure out the URL part
3. modifies joinContext to allow common context mismatches
   around branches to be ignored when not material as in
   <a href="/foo{{if .HasQuery}}?q={{.Query}}{{/if}}">
4. adds a pipeline function that filters dynamically inserted
   protocols to prevent code injection via URLs.

R=nigeltao
CC=golang-dev
https://golang.org/cl/4957041

src/pkg/exp/template/html/context.go
src/pkg/exp/template/html/escape.go
src/pkg/exp/template/html/escape_test.go

index 5ef3b78146ad3fee023d923328be7786af2a901e..d8fed158677402bf2e7044c931df954456c331de 100644 (file)
@@ -18,13 +18,14 @@ import (
 type context struct {
        state   state
        delim   delim
+       urlPart urlPart
        errLine int
        errStr  string
 }
 
 // eq returns whether two contexts are equal.
 func (c context) eq(d context) bool {
-       return c.state == d.state && c.delim == d.delim && c.errLine == d.errLine && c.errStr == d.errStr
+       return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr
 }
 
 // state describes a high-level HTML parser state.
@@ -97,3 +98,36 @@ func (d delim) String() string {
        }
        return fmt.Sprintf("illegal delim %d", d)
 }
+
+// urlPart identifies a part in an RFC 3986 hierarchical URL to allow different
+// encoding strategies.
+type urlPart uint8
+
+const (
+       // urlPartNone occurs when not in a URL, or possibly at the start:
+       // ^ in "^http://auth/path?k=v#frag".
+       urlPartNone urlPart = iota
+       // urlPartPreQuery occurs in the scheme, authority, or path; between the
+       // ^s in "h^ttp://auth/path^?k=v#frag".
+       urlPartPreQuery
+       // urlPartQueryOrFrag occurs in the query portion between the ^s in
+       // "http://auth/path?^k=v#frag^".
+       urlPartQueryOrFrag
+       // urlPartUnknown occurs due to joining of contexts both before and after
+       // the query separator.
+       urlPartUnknown
+)
+
+var urlPartNames = [...]string{
+       urlPartNone:        "urlPartNone",
+       urlPartPreQuery:    "urlPartPreQuery",
+       urlPartQueryOrFrag: "urlPartQueryOrFrag",
+       urlPartUnknown:     "urlPartUnknown",
+}
+
+func (u urlPart) String() string {
+       if int(u) < len(urlPartNames) {
+               return urlPartNames[u]
+       }
+       return fmt.Sprintf("illegal urlPart %d", u)
+}
index 52d6323fae68688c92b50f741eb340df88f4428c..e7de81c4c6860910c7a210b06826bdfe1a94e7f9 100644 (file)
@@ -10,6 +10,7 @@ package html
 import (
        "bytes"
        "fmt"
+       "html"
        "os"
        "strings"
        "template"
@@ -26,9 +27,15 @@ func Escape(t *template.Template) (*template.Template, os.Error) {
        if c.state != stateText {
                return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c)
        }
+       t.Funcs(funcMap)
        return t, nil
 }
 
+// funcMap maps command names to functions that render their inputs safe.
+var funcMap = template.FuncMap{
+       "exp_template_html_urlfilter": urlFilter,
+}
+
 // escape escapes a template node.
 func escape(c context, n parse.Node) context {
        switch n := n.(type) {
@@ -53,7 +60,22 @@ func escape(c context, n parse.Node) context {
 func escapeAction(c context, n *parse.ActionNode) context {
        sanitizer := "html"
        if c.state == stateURL {
-               sanitizer = "urlquery"
+               switch c.urlPart {
+               case urlPartNone:
+                       sanitizer = "exp_template_html_urlfilter"
+               case urlPartQueryOrFrag:
+                       sanitizer = "urlquery"
+               case urlPartPreQuery:
+                       // The default "html" works here.
+               case urlPartUnknown:
+                       return context{
+                               state:   stateError,
+                               errLine: n.Line,
+                               errStr:  fmt.Sprintf("%s appears in an ambiguous URL context", n),
+                       }
+               default:
+                       panic(c.urlPart.String())
+               }
        }
        // If the pipe already ends with the sanitizer, do not interfere.
        if m := len(n.Pipe.Cmds); m != 0 {
@@ -84,6 +106,15 @@ func join(a, b context, line int, nodeName string) context {
        if a.eq(b) {
                return a
        }
+
+       c := a
+       c.urlPart = b.urlPart
+       if c.eq(b) {
+               // The contexts differ only by urlPart.
+               c.urlPart = urlPartUnknown
+               return c
+       }
+
        return context{
                state:   stateError,
                errLine: line,
@@ -148,8 +179,15 @@ func escapeText(c context, s []byte) context {
                i := bytes.IndexAny(s, delimEnds[c.delim])
                if i == -1 {
                        // Remain inside the attribute.
-                       // TODO: Recurse to take into account grammars for
-                       // JS, CSS, URIs embedded in attrs once implemented.
+                       // Decode the value so non-HTML rules can easily handle
+                       //     <button onclick="alert(&quot;Hi!&quot;)">
+                       // without having to entity decode token boundaries.
+                       d := c.delim
+                       c.delim = delimNone
+                       c = escapeText(c, []byte(html.UnescapeString(string(s))))
+                       if c.state != stateError {
+                               c.delim = d
+                       }
                        return c
                }
                if c.delim != delimSpaceOrTagEnd {
@@ -249,10 +287,11 @@ func tAttr(c context, s []byte) (context, []byte) {
 
 // tURL is the context transition function for the URL state.
 func tURL(c context, s []byte) (context, []byte) {
-       // TODO: Look for query and fragment boundaries within a URL so we
-       // can %-encode actions in the query and fragment parts, HTML escape
-       // actions elsewhere, and filter any actions at the start that might
-       // inject a dangerous protocol such as "javascript:".
+       if bytes.IndexAny(s, "#?") >= 0 {
+               c.urlPart = urlPartQueryOrFrag
+       } else if c.urlPart == urlPartNone {
+               c.urlPart = urlPartPreQuery
+       }
        return c, nil
 }
 
@@ -338,3 +377,28 @@ var urlAttr = map[string]bool{
        "src":        true,
        "usemap":     true,
 }
+
+// urlFilter returns the HTML equivalent of its input unless it contains an
+// unsafe protocol in which case it defangs the entire URL.
+func urlFilter(args ...interface{}) string {
+       ok := false
+       var s string
+       if len(args) == 1 {
+               s, ok = args[0].(string)
+       }
+       if !ok {
+               s = fmt.Sprint(args...)
+       }
+       i := strings.IndexRune(s, ':')
+       if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+               protocol := strings.ToLower(s[:i])
+               if protocol != "http" && protocol != "https" && protocol != "mailto" {
+                       // Return a value that someone investigating a bug
+                       // report can put into a search engine.
+                       return "#ZgotmplZ"
+               }
+       }
+       // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
+       // we will need to stop this from HTML escaping and pipeline sanitizers.
+       return template.HTMLEscapeString(s)
+}
index 6bab507874026e2ca019689c70be88a9137a25fd..a911c7d8357b3791e0f6f1611f6bd9c9a2153f0f 100644 (file)
@@ -83,14 +83,64 @@ func TestEscape(t *testing.T) {
                        // in the obsolete "mark" rule in an appendix in RFC 3986 so can be
                        // safely encoded.
                        "constant",
-                       `<a href="{{"'a<b'"}}">`,
-                       `<a href="'a%3Cb'">`,
+                       `<a href="/search?q={{"'a<b'"}}">`,
+                       `<a href="/search?q='a%3Cb'">`,
                },
                {
                        "multipleAttrs",
                        "<a b=1 c={{.H}}>",
                        "<a b=1 c=&lt;Hello&gt;>",
                },
+               {
+                       "urlStartRel",
+                       `<a href='{{"/foo/bar?a=b&c=d"}}'>`,
+                       `<a href='/foo/bar?a=b&amp;c=d'>`,
+               },
+               {
+                       "urlStartAbsOk",
+                       `<a href='{{"http://example.com/foo/bar?a=b&c=d"}}'>`,
+                       `<a href='http://example.com/foo/bar?a=b&amp;c=d'>`,
+               },
+               {
+                       "protocolRelativeURLStart",
+                       `<a href='{{"//example.com:8000/foo/bar?a=b&c=d"}}'>`,
+                       `<a href='//example.com:8000/foo/bar?a=b&amp;c=d'>`,
+               },
+               {
+                       "pathRelativeURLStart",
+                       `<a href="{{"/javascript:80/foo/bar"}}">`,
+                       `<a href="/javascript:80/foo/bar">`,
+               },
+               {
+                       "dangerousURLStart",
+                       `<a href='{{"javascript:alert(%22pwned%22)"}}'>`,
+                       `<a href='#ZgotmplZ'>`,
+               },
+               {
+                       "urlPath",
+                       `<a href='http://{{"javascript:80"}}/foo'>`,
+                       `<a href='http://javascript:80/foo'>`,
+               },
+               {
+                       "urlQuery",
+                       `<a href='/search?q={{.H}}'>`,
+                       `<a href='/search?q=%3CHello%3E'>`,
+               },
+               {
+                       "urlFragment",
+                       `<a href='/faq#{{.H}}'>`,
+                       `<a href='/faq#%3CHello%3E'>`,
+               },
+               {
+                       "urlBranch",
+                       `<a href="{{if .F}}/foo?a=b{{else}}/bar{{end}}">`,
+                       `<a href="/bar">`,
+               },
+               {
+                       "urlBranchConflictMoot",
+                       `<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
+                       `<a href="/foo?a=%3CCincinatti%3E">`,
+               },
        }
 
        for _, tc := range testCases {
@@ -181,6 +231,10 @@ func TestErrors(t *testing.T) {
                        "<a b=1 c={{.H}}",
                        "z ends in a non-text context: {stateAttr delimSpaceOrTagEnd",
                },
+               {
+                       `<a href="{{if .F}}/foo?a={{else}}/bar/{{end}}{{.H}}">`,
+                       "z:1: (action: [(command: [F=[H]])]) appears in an ambiguous URL context",
+               },
        }
 
        for _, tc := range testCases {
@@ -242,7 +296,7 @@ func TestEscapeText(t *testing.T) {
                },
                {
                        `<a href=x`,
-                       context{state: stateURL, delim: delimSpaceOrTagEnd},
+                       context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
                },
                {
                        `<a href=x `,
@@ -278,19 +332,35 @@ func TestEscapeText(t *testing.T) {
                },
                {
                        `<a HREF='http:`,
-                       context{state: stateURL, delim: delimSingleQuote},
+                       context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
                },
                {
                        `<a Href='/`,
-                       context{state: stateURL, delim: delimSingleQuote},
+                       context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
                },
                {
                        `<a href='"`,
-                       context{state: stateURL, delim: delimSingleQuote},
+                       context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
                },
                {
                        `<a href="'`,
-                       context{state: stateURL, delim: delimDoubleQuote},
+                       context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a href='&apos;`,
+                       context{state: stateURL, delim: delimSingleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a href="&quot;`,
+                       context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a href="&#34;`,
+                       context{state: stateURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a href=&quot;`,
+                       context{state: stateURL, delim: delimSpaceOrTagEnd, urlPart: urlPartPreQuery},
                },
                {
                        `<img alt="1">`,