From: Mike Samuel Date: Thu, 8 Sep 2011 21:18:20 +0000 (+1000) Subject: exp/template/html: autoescape actions in HTML style attributes. X-Git-Tag: weekly.2011-09-16~70 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=4670d9e6349119914c9a5a1b40ce32045f3c0061;p=gostls13.git exp/template/html: autoescape actions in HTML style attributes. This does not wire up once that CL - // has been merged. +// tLineCmt is the context transition function for //comment states. +func tLineCmt(c context, s []byte) (context, []byte) { + // TODO: look for + // ul.inlineList { list-style: none; padding:0 } + // ul.inlineList > li { display: inline } + // ul.inlineList > li:before { content: ", " } + // ul.inlineList > li:first-child:before { content: "" } + // + // + // (4) Attribute value selectors as in a[href="http://example.com/"] + // + // We conservatively treat all strings as URLs, but make some + // allowances to avoid confusion. + // + // In (1), our conservative assumption is justified. + // In (2), valid font names do not contain ':', '?', or '#', so our + // conservative assumption is fine since we will never transition past + // urlPartPreQuery. + // In (3), our protocol heuristic should not be tripped, and there + // should not be non-space content after a '?' or '#', so as long as + // we only %-encode RFC 3986 reserved characters we are ok. + // In (4), we should URL escape for URL attributes, and for others we + // have the attribute name available if our conservative assumption + // proves problematic for real code. + + for { + i := bytes.IndexAny(s, `("'/`) + if i == -1 { + return c, nil + } + switch s[i] { + case '(': + // Look for url to the left. + p := bytes.TrimRight(s[:i], "\t\n\f\r ") + if endsWithCSSKeyword(p, "url") { + q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ") + switch { + case len(q) != 0 && q[0] == '"': + c.state, s = stateCSSDqURL, q[1:] + case len(q) != 0 && q[0] == '\'': + c.state, s = stateCSSSqURL, q[1:] + + default: + c.state, s = stateCSSURL, q + } + return c, s + } + case '/': + if i+1 < len(s) { + switch s[i+1] { + case '/': + c.state = stateCSSLineCmt + return c, s[i+2:] + case '*': + c.state = stateCSSBlockCmt + return c, s[i+2:] + } + } + case '"': + c.state = stateCSSDqStr + return c, s[i+1:] + case '\'': + c.state = stateCSSSqStr + return c, s[i+1:] + } + s = s[i+1:] + } + panic("unreachable") +} + +// tCSSStr is the context transition function for the CSS string and URL states. +func tCSSStr(c context, s []byte) (context, []byte) { + // TODO: look for = 0 && strings.IndexRune(s[:i], '/') < 0 { - protocol := strings.ToLower(s[:i]) - if protocol != "http" && protocol != "https" && protocol != "mailto" { - // Return a value that someone investigating a bug - // report can put into a search engine. - return "#ZgotmplZ" - } - } - // TODO: Once we handle - // we will need to stop this from HTML escaping and pipeline sanitizers. - return template.HTMLEscapeString(s) -} diff --git a/src/pkg/exp/template/html/escape_test.go b/src/pkg/exp/template/html/escape_test.go index 6f5ecf6ef3..3294323409 100644 --- a/src/pkg/exp/template/html/escape_test.go +++ b/src/pkg/exp/template/html/escape_test.go @@ -82,14 +82,9 @@ func TestEscape(t *testing.T) { "true", }, { - // TODO: Make sure the URL escaper escapes single quotes so it can - // be embedded in single quoted URI attributes and CSS url(...) - // constructs. Single quotes are reserved in URLs, but are only used - // in the obsolete "mark" rule in an appendix in RFC 3986 so can be - // safely encoded. "constant", ``, - ``, + ``, }, { "multipleAttrs", @@ -121,6 +116,11 @@ func TestEscape(t *testing.T) { ``, ``, }, + { + "nonHierURL", + `"}}>`, + ``, + }, { "urlPath", ``, @@ -129,12 +129,12 @@ func TestEscape(t *testing.T) { { "urlQuery", ``, - ``, + ``, }, { "urlFragment", ``, - ``, + ``, }, { "urlBranch", @@ -144,7 +144,7 @@ func TestEscape(t *testing.T) { { "urlBranchConflictMoot", ``, - ``, + ``, }, { "jsStrValue", @@ -192,6 +192,138 @@ func TestEscape(t *testing.T) { "