TARG=exp/template/html
GOFILES=\
context.go\
+ css.go\
escape.go\
+ html.go\
js.go\
+ url.go\
include ../../../../Make.pkg
stateJSBlockCmt
// stateJSLineCmt occurs inside a JavaScript // line comment.
stateJSLineCmt
+ // stateCSS occurs inside a <style> element or style attribute.
+ stateCSS
+ // stateCSSDqStr occurs inside a CSS double quoted string.
+ stateCSSDqStr
+ // stateCSSSqStr occurs inside a CSS single quoted string.
+ stateCSSSqStr
+ // stateCSSDqURL occurs inside a CSS double quoted url("...").
+ stateCSSDqURL
+ // stateCSSSqURL occurs inside a CSS single quoted url('...').
+ stateCSSSqURL
+ // stateCSSURL occurs inside a CSS unquoted url(...).
+ stateCSSURL
+ // stateCSSBlockCmt occurs inside a CSS /* block comment */.
+ stateCSSBlockCmt
+ // stateCSSLineCmt occurs inside a CSS // line comment.
+ stateCSSLineCmt
// stateError is an infectious error state outside any valid
// HTML/CSS/JS construct.
stateError
)
var stateNames = [...]string{
- stateText: "stateText",
- stateTag: "stateTag",
- stateAttr: "stateAttr",
- stateURL: "stateURL",
- stateJS: "stateJS",
- stateJSDqStr: "stateJSDqStr",
- stateJSSqStr: "stateJSSqStr",
- stateJSRegexp: "stateJSRegexp",
- stateJSBlockCmt: "stateJSBlockCmt",
- stateJSLineCmt: "stateJSLineCmt",
- stateError: "stateError",
+ stateText: "stateText",
+ stateTag: "stateTag",
+ stateAttr: "stateAttr",
+ stateURL: "stateURL",
+ stateJS: "stateJS",
+ stateJSDqStr: "stateJSDqStr",
+ stateJSSqStr: "stateJSSqStr",
+ stateJSRegexp: "stateJSRegexp",
+ stateJSBlockCmt: "stateJSBlockCmt",
+ stateJSLineCmt: "stateJSLineCmt",
+ stateCSS: "stateCSS",
+ stateCSSDqStr: "stateCSSDqStr",
+ stateCSSSqStr: "stateCSSSqStr",
+ stateCSSDqURL: "stateCSSDqURL",
+ stateCSSSqURL: "stateCSSSqURL",
+ stateCSSURL: "stateCSSURL",
+ stateCSSBlockCmt: "stateCSSBlockCmt",
+ stateCSSLineCmt: "stateCSSLineCmt",
+ stateError: "stateError",
}
func (s state) String() string {
// urlPartQueryOrFrag occurs in the query portion between the ^s in
// "http://auth/path?^k=v#frag^".
urlPartQueryOrFrag
- // urlPartUnknown occurs due to joining of contexts both before and after
- // the query separator.
+ // urlPartUnknown occurs due to joining of contexts both before and
+ // after the query separator.
urlPartUnknown
)
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "bytes"
+ "fmt"
+ "unicode"
+ "utf8"
+)
+
+// endsWithCSSKeyword returns whether b ends with an ident that
+// case-insensitively matches the lower-case kw.
+func endsWithCSSKeyword(b []byte, kw string) bool {
+ i := len(b) - len(kw)
+ if i < 0 {
+ // Too short.
+ return false
+ }
+ if i != 0 {
+ r, _ := utf8.DecodeLastRune(b[:i])
+ if isCSSNmchar(r) {
+ // Too long.
+ return false
+ }
+ }
+ // Many CSS keywords, such as "!important" can have characters encoded,
+ // but the URI production does not allow that according to
+ // http://www.w3.org/TR/css3-syntax/#TOK-URI
+ // This does not attempt to recognize encoded keywords. For example,
+ // given "\75\72\6c" and "url" this return false.
+ return string(bytes.ToLower(b[i:])) == kw
+}
+
+// isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
+func isCSSNmchar(rune int) bool {
+ // Based on the CSS3 nmchar production but ignores multi-rune escape
+ // sequences.
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
+ return 'a' <= rune && rune <= 'z' ||
+ 'A' <= rune && rune <= 'Z' ||
+ '0' <= rune && rune <= '9' ||
+ '-' == rune ||
+ '_' == rune ||
+ // Non-ASCII cases below.
+ 0x80 <= rune && rune <= 0xd7ff ||
+ 0xe000 <= rune && rune <= 0xfffd ||
+ 0x10000 <= rune && rune <= 0x10ffff
+}
+
+// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
+// If there is no change, it returns the input, otherwise it returns a slice
+// backed by a new array.
+// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
+func decodeCSS(s []byte) []byte {
+ i := bytes.IndexByte(s, '\\')
+ if i == -1 {
+ return s
+ }
+ // The UTF-8 sequence for a codepoint is never longer than 1 + the
+ // number hex digits need to represent that codepoint, so len(s) is an
+ // upper bound on the output length.
+ b := make([]byte, 0, len(s))
+ for len(s) != 0 {
+ i := bytes.IndexByte(s, '\\')
+ if i == -1 {
+ i = len(s)
+ }
+ b, s = append(b, s[:i]...), s[i:]
+ if len(s) < 2 {
+ break
+ }
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
+ // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+ if isHex(s[1]) {
+ // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
+ // unicode ::= '\' [0-9a-fA-F]{1,6} wc?
+ j := 2
+ for j < len(s) && j < 7 && isHex(s[j]) {
+ j++
+ }
+ rune := hexDecode(s[1:j])
+ if rune > unicode.MaxRune {
+ rune, j = rune/16, j-1
+ }
+ n := utf8.EncodeRune(b[len(b):cap(b)], rune)
+ // The optional space at the end allows a hex
+ // sequence to be followed by a literal hex.
+ // string(decodeCSS([]byte(`\A B`))) == "\nB"
+ b, s = b[:len(b)+n], skipCSSSpace(s[j:])
+ } else {
+ // `\\` decodes to `\` and `\"` to `"`.
+ _, n := utf8.DecodeRune(s[1:])
+ b, s = append(b, s[1:1+n]...), s[1+n:]
+ }
+ }
+ return b
+}
+
+// isHex returns whether the given character is a hex digit.
+func isHex(c byte) bool {
+ return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
+}
+
+// hexDecode decodes a short hex digit sequence: "10" -> 16.
+func hexDecode(s []byte) int {
+ n := 0
+ for _, c := range s {
+ n <<= 4
+ switch {
+ case '0' <= c && c <= '9':
+ n |= int(c - '0')
+ case 'a' <= c && c <= 'f':
+ n |= int(c-'a') + 10
+ case 'A' <= c && c <= 'F':
+ n |= int(c-'A') + 10
+ default:
+ panic(fmt.Sprintf("Bad hex digit in %q", s))
+ }
+ }
+ return n
+}
+
+// skipCSSSpace returns a suffix of c, skipping over a single space.
+func skipCSSSpace(c []byte) []byte {
+ if len(c) == 0 {
+ return c
+ }
+ // wc ::= #x9 | #xA | #xC | #xD | #x20
+ switch c[0] {
+ case '\t', '\n', '\f', ' ':
+ return c[1:]
+ case '\r':
+ // This differs from CSS3's wc production because it contains a
+ // probable spec error whereby wc contains all the single byte
+ // sequences in nl (newline) but not CRLF.
+ if len(c) >= 2 && c[1] == '\n' {
+ return c[2:]
+ }
+ return c[1:]
+ }
+ return c
+}
+
+// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
+func cssEscaper(args ...interface{}) string {
+ s := stringify(args...)
+ var b bytes.Buffer
+ written := 0
+ for i, r := range s {
+ var repl string
+ switch r {
+ case 0:
+ repl = `\0`
+ case '\t':
+ repl = `\9`
+ case '\n':
+ repl = `\a`
+ case '\f':
+ repl = `\c`
+ case '\r':
+ repl = `\d`
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ case '"':
+ repl = `\22`
+ case '&':
+ repl = `\26`
+ case '\'':
+ repl = `\27`
+ case '(':
+ repl = `\28`
+ case ')':
+ repl = `\29`
+ case '+':
+ repl = `\2b`
+ case '/':
+ repl = `\2f`
+ case ':':
+ repl = `\3a`
+ case ';':
+ repl = `\3b`
+ case '<':
+ repl = `\3c`
+ case '>':
+ repl = `\3e`
+ case '\\':
+ repl = `\\`
+ case '{':
+ repl = `\7b`
+ case '}':
+ repl = `\7d`
+ default:
+ continue
+ }
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ written = i + utf8.RuneLen(r)
+ if repl != `\\` && (written == len(s) || isHex(s[written])) {
+ b.WriteByte(' ')
+ }
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
+
+var expressionBytes = []byte("expression")
+var mozBindingBytes = []byte("mozbinding")
+
+// cssValueFilter allows innocuous CSS values in the output including CSS
+// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
+// (inherit, blue), and colors (#888).
+// It filters out unsafe values, such as those that affect token boundaries,
+// and anything that might execute scripts.
+func cssValueFilter(args ...interface{}) string {
+ s, id := decodeCSS([]byte(stringify(args...))), make([]byte, 0, 64)
+
+ // CSS3 error handling is specified as honoring string boundaries per
+ // http://www.w3.org/TR/css3-syntax/#error-handling :
+ // Malformed declarations. User agents must handle unexpected
+ // tokens encountered while parsing a declaration by reading until
+ // the end of the declaration, while observing the rules for
+ // matching pairs of (), [], {}, "", and '', and correctly handling
+ // escapes. For example, a malformed declaration may be missing a
+ // property, colon (:) or value.
+ // So we need to make sure that values do not have mismatched bracket
+ // or quote characters to prevent the browser from restarting parsing
+ // inside a string that might embed JavaScript source.
+ for i, c := range s {
+ switch c {
+ case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
+ return filterFailsafe
+ case '-':
+ // Disallow <!-- or -->.
+ // -- should not appear in valid identifiers.
+ if i != 0 && '-' == s[i-1] {
+ return filterFailsafe
+ }
+ default:
+ if c < 0x80 && isCSSNmchar(int(c)) {
+ id = append(id, c)
+ }
+ }
+ }
+ id = bytes.ToLower(id)
+ if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
+ return filterFailsafe
+ }
+ return string(s)
+}
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "strconv"
+ "strings"
+ "testing"
+)
+
+func TestEndsWithCSSKeyword(t *testing.T) {
+ tests := []struct {
+ css, kw string
+ want bool
+ }{
+ {"", "url", false},
+ {"url", "url", true},
+ {"URL", "url", true},
+ {"Url", "url", true},
+ {"url", "important", false},
+ {"important", "important", true},
+ {"image-url", "url", false},
+ {"imageurl", "url", false},
+ {"image url", "url", true},
+ }
+ for _, test := range tests {
+ got := endsWithCSSKeyword([]byte(test.css), test.kw)
+ if got != test.want {
+ t.Errorf("want %t but got %t for css=%v, kw=%v", test.want, got, test.css, test.kw)
+ }
+ }
+}
+
+func TestIsCSSNmchar(t *testing.T) {
+ tests := []struct {
+ rune int
+ want bool
+ }{
+ {0, false},
+ {'0', true},
+ {'9', true},
+ {'A', true},
+ {'Z', true},
+ {'a', true},
+ {'z', true},
+ {'_', true},
+ {'-', true},
+ {':', false},
+ {';', false},
+ {' ', false},
+ {0x7f, false},
+ {0x80, true},
+ {0x1234, true},
+ {0xd800, false},
+ {0xdc00, false},
+ {0xfffe, false},
+ {0x10000, true},
+ {0x110000, false},
+ }
+ for _, test := range tests {
+ got := isCSSNmchar(test.rune)
+ if got != test.want {
+ t.Errorf("%q: want %t but got %t", string(test.rune), test.want, got)
+ }
+ }
+}
+
+func TestDecodeCSS(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {``, ``},
+ {`foo`, `foo`},
+ {`foo\`, `foo`},
+ {`foo\\`, `foo\`},
+ {`\`, ``},
+ {`\A`, "\n"},
+ {`\a`, "\n"},
+ {`\0a`, "\n"},
+ {`\00000a`, "\n"},
+ {`\000000a`, "\u0000a"},
+ {`\1234 5`, "\u1234" + "5"},
+ {`\1234\20 5`, "\u1234" + " 5"},
+ {`\1234\A 5`, "\u1234" + "\n5"},
+ {"\\1234\t5", "\u1234" + "5"},
+ {"\\1234\n5", "\u1234" + "5"},
+ {"\\1234\r\n5", "\u1234" + "5"},
+ {`\12345`, "\U00012345"},
+ {`\\`, `\`},
+ {`\\ `, `\ `},
+ {`\"`, `"`},
+ {`\'`, `'`},
+ {`\.`, `.`},
+ {`\. .`, `. .`},
+ {
+ `The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e fox jumps\2028over the \3c canine class=\22lazy\22 \3e dog\3c/canine\3e`,
+ "The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>",
+ },
+ }
+ for _, test := range tests {
+ got := string(decodeCSS([]byte(test.css)))
+ if got != test.want {
+ t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.css, test.want, got)
+ }
+ }
+}
+
+func TestHexDecode(t *testing.T) {
+ for i := 0; i < 0x200000; i += 101 /* coprime with 16 */ {
+ s := strconv.Itob(i, 16)
+ if got := hexDecode([]byte(s)); got != i {
+ t.Errorf("%s: want %d but got %d", s, i, got)
+ }
+ s = strings.ToUpper(s)
+ if got := hexDecode([]byte(s)); got != i {
+ t.Errorf("%s: want %d but got %d", s, i, got)
+ }
+ }
+}
+
+func TestSkipCSSSpace(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {"", ""},
+ {"foo", "foo"},
+ {"\n", ""},
+ {"\r\n", ""},
+ {"\r", ""},
+ {"\t", ""},
+ {" ", ""},
+ {"\f", ""},
+ {" foo", "foo"},
+ {" foo", " foo"},
+ {`\20`, `\20`},
+ }
+ for _, test := range tests {
+ got := string(skipCSSSpace([]byte(test.css)))
+ if got != test.want {
+ t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+ }
+ }
+}
+
+func TestCSSEscaper(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ want := ("\\0\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08\\9\\a\x0b\\c\\d\x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !\22#$%\26\27\28\29*\2b,-.\2f ` +
+ `0123456789\3a\3b\3c=\3e?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\\]^_` +
+ "`abcdefghijklmno" +
+ `pqrstuvwxyz\7b|\7d~` + "\u007f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ got := cssEscaper(input)
+ if got != want {
+ t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+
+ got = string(decodeCSS([]byte(got)))
+ if input != got {
+ t.Errorf("decode: want\n\t%q\nbut got\n\t%q", input, got)
+ }
+}
+
+func TestCSSValueFilter(t *testing.T) {
+ tests := []struct {
+ css, want string
+ }{
+ {"", ""},
+ {"foo", "foo"},
+ {"0", "0"},
+ {"0px", "0px"},
+ {"-5px", "-5px"},
+ {"1.25in", "1.25in"},
+ {"+.33em", "+.33em"},
+ {"100%", "100%"},
+ {"12.5%", "12.5%"},
+ {".foo", ".foo"},
+ {"#bar", "#bar"},
+ {"corner-radius", "corner-radius"},
+ {"-moz-corner-radius", "-moz-corner-radius"},
+ {"#000", "#000"},
+ {"#48f", "#48f"},
+ {"#123456", "#123456"},
+ {"U+00-FF, U+980-9FF", "U+00-FF, U+980-9FF"},
+ {"color: red", "color: red"},
+ {"<!--", "ZgotmplZ"},
+ {"-->", "ZgotmplZ"},
+ {"<![CDATA[", "ZgotmplZ"},
+ {"]]>", "ZgotmplZ"},
+ {"</style", "ZgotmplZ"},
+ {`"`, "ZgotmplZ"},
+ {`'`, "ZgotmplZ"},
+ {"`", "ZgotmplZ"},
+ {"\x00", "ZgotmplZ"},
+ {"/* foo */", "ZgotmplZ"},
+ {"//", "ZgotmplZ"},
+ {"[href=~", "ZgotmplZ"},
+ {"expression(alert(1337))", "ZgotmplZ"},
+ {"-expression(alert(1337))", "ZgotmplZ"},
+ {"expression", "ZgotmplZ"},
+ {"Expression", "ZgotmplZ"},
+ {"EXPRESSION", "ZgotmplZ"},
+ {"-moz-binding", "ZgotmplZ"},
+ {"-expr\x00ession(alert(1337))", "ZgotmplZ"},
+ {`-expr\0ession(alert(1337))`, "ZgotmplZ"},
+ {`-express\69on(alert(1337))`, "ZgotmplZ"},
+ {`-express\69 on(alert(1337))`, "ZgotmplZ"},
+ {`-exp\72 ession(alert(1337))`, "ZgotmplZ"},
+ {`-exp\52 ession(alert(1337))`, "ZgotmplZ"},
+ {`-exp\000052 ession(alert(1337))`, "ZgotmplZ"},
+ {`-expre\0000073sion`, "-expre\x073sion"},
+ {`@import url evil.css`, "ZgotmplZ"},
+ }
+ for _, test := range tests {
+ got := cssValueFilter(test.css)
+ if got != test.want {
+ t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+ }
+ }
+}
+
+func BenchmarkCSSEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkCSSEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssEscaper("The quick, brown fox jumps over the lazy dog.")
+ }
+}
+
+func BenchmarkDecodeCSS(b *testing.B) {
+ s := []byte(`The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e fox jumps\2028over the \3c canine class=\22lazy\22 \3edog\3c/canine\3e`)
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ decodeCSS(s)
+ }
+}
+
+func BenchmarkDecodeCSSNoSpecials(b *testing.B) {
+ s := []byte("The quick, brown fox jumps over the lazy dog.")
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ decodeCSS(s)
+ }
+}
+
+func BenchmarkCSSValueFilter(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssValueFilter(` e\78preS\0Sio/**/n(alert(1337))`)
+ }
+}
+
+func BenchmarkCSSValueFilterOk(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ cssValueFilter(`Times New Roman`)
+ }
+}
// funcMap maps command names to functions that render their inputs safe.
var funcMap = template.FuncMap{
- "exp_template_html_urlfilter": urlFilter,
- "exp_template_html_jsvalescaper": jsValEscaper,
- "exp_template_html_jsstrescaper": jsStrEscaper,
+ "exp_template_html_cssescaper": cssEscaper,
+ "exp_template_html_cssvaluefilter": cssValueFilter,
"exp_template_html_jsregexpescaper": jsRegexpEscaper,
+ "exp_template_html_jsstrescaper": jsStrEscaper,
+ "exp_template_html_jsvalescaper": jsValEscaper,
+ "exp_template_html_nospaceescaper": htmlNospaceEscaper,
+ "exp_template_html_urlescaper": urlEscaper,
+ "exp_template_html_urlfilter": urlFilter,
+ "exp_template_html_urlnormalizer": urlNormalizer,
}
+// filterFailsafe is an innocuous word that is emitted in place of unsafe values
+// by sanitizer functions. It is not a keyword in any programming language,
+// contains no special characters, is not empty, and when it appears in output
+// it is distinct enough that a developer can find the source of the problem
+// via a search engine.
+const filterFailsafe = "ZgotmplZ"
+
// escape escapes a template node.
func escape(c context, n parse.Node) context {
switch n := n.(type) {
// escapeAction escapes an action template node.
func escapeAction(c context, n *parse.ActionNode) context {
- s := make([]string, 0, 2)
+ s := make([]string, 0, 3)
switch c.state {
- case stateURL:
+ case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
switch c.urlPart {
case urlPartNone:
s = append(s, "exp_template_html_urlfilter")
- case urlPartQueryOrFrag:
- s = append(s, "urlquery")
+ fallthrough
case urlPartPreQuery:
- s = append(s, "html")
+ switch c.state {
+ case stateCSSDqStr, stateCSSSqStr:
+ s = append(s, "exp_template_html_cssescaper")
+ case stateCSSDqURL, stateCSSSqURL, stateCSSURL:
+ s = append(s, "exp_template_html_urlnormalizer")
+ }
+ case urlPartQueryOrFrag:
+ s = append(s, "exp_template_html_urlescaper")
case urlPartUnknown:
return context{
state: stateError,
}
case stateJS:
s = append(s, "exp_template_html_jsvalescaper")
- if c.delim != delimNone {
- s = append(s, "html")
- }
case stateJSDqStr, stateJSSqStr:
s = append(s, "exp_template_html_jsstrescaper")
case stateJSRegexp:
s = append(s, "exp_template_html_jsregexpescaper")
- case stateJSBlockCmt, stateJSLineCmt:
+ case stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
return context{
state: stateError,
errLine: n.Line,
errStr: fmt.Sprintf("%s appears inside a comment", n),
}
+ case stateCSS:
+ s = append(s, "exp_template_html_cssvaluefilter")
+ case stateText:
+ s = append(s, "html")
+ }
+ switch c.delim {
+ case delimNone:
+ // No extra-escaping needed for raw text content.
+ case delimSpaceOrTagEnd:
+ s = append(s, "exp_template_html_nospaceescaper")
default:
s = append(s, "html")
}
// A transition function takes a context and template text input, and returns
// the updated context and any unconsumed text.
var transitionFunc = [...]func(context, []byte) (context, []byte){
- stateText: tText,
- stateTag: tTag,
- stateURL: tURL,
- stateJS: tJS,
- stateJSDqStr: tJSStr,
- stateJSSqStr: tJSStr,
- stateJSRegexp: tJSRegexp,
- stateJSBlockCmt: tJSBlockCmt,
- stateJSLineCmt: tJSLineCmt,
- stateAttr: tAttr,
- stateError: tError,
+ stateText: tText,
+ stateTag: tTag,
+ stateAttr: tAttr,
+ stateURL: tURL,
+ stateJS: tJS,
+ stateJSDqStr: tJSStr,
+ stateJSSqStr: tJSStr,
+ stateJSRegexp: tJSRegexp,
+ stateJSBlockCmt: tBlockCmt,
+ stateJSLineCmt: tLineCmt,
+ stateCSS: tCSS,
+ stateCSSDqStr: tCSSStr,
+ stateCSSSqStr: tCSSStr,
+ stateCSSDqURL: tCSSStr,
+ stateCSSSqURL: tCSSStr,
+ stateCSSURL: tCSSStr,
+ stateCSSBlockCmt: tBlockCmt,
+ stateCSSLineCmt: tLineCmt,
+ stateError: tError,
}
// tText is the context transition function for the text state.
state = stateURL
} else if strings.HasPrefix(canonAttrName, "on") {
state = stateJS
+ } else if canonAttrName == "style" {
+ state = stateCSS
}
// Look for the start of the value.
func tURL(c context, s []byte) (context, []byte) {
if bytes.IndexAny(s, "#?") >= 0 {
c.urlPart = urlPartQueryOrFrag
- } else if c.urlPart == urlPartNone {
+ } else if len(s) != 0 && c.urlPart == urlPartNone {
c.urlPart = urlPartPreQuery
}
return c, nil
var blockCommentEnd = []byte("*/")
-// tJSBlockCmt is the context transition function for the JS /*comment*/ state.
-func tJSBlockCmt(c context, s []byte) (context, []byte) {
- // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
- // has been merged.
-
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, []byte) {
+ // TODO: look for </script or </style end tags.
i := bytes.Index(s, blockCommentEnd)
if i == -1 {
return c, nil
}
- c.state = stateJS
+ switch c.state {
+ case stateJSBlockCmt:
+ c.state = stateJS
+ case stateCSSBlockCmt:
+ c.state = stateCSS
+ default:
+ panic(c.state.String())
+ }
return c, s[i+2:]
}
-// tJSLineCmt is the context transition function for the JS //comment state.
-func tJSLineCmt(c context, s []byte) (context, []byte) {
- // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
- // has been merged.
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, []byte) {
+ // TODO: look for </script or </style end tags.
+ var lineTerminators string
+ var endState state
+ switch c.state {
+ case stateJSLineCmt:
+ lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+ case stateCSSLineCmt:
+ lineTerminators, endState = "\n\f\r", stateCSS
+ // Line comments are not part of any published CSS standard but
+ // are supported by the 4 major browsers.
+ // This defines line comments as
+ // LINECOMMENT ::= "//" [^\n\f\d]*
+ // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+ // newlines:
+ // nl ::= #xA | #xD #xA | #xD | #xC
+ default:
+ panic(c.state.String())
+ }
- i := bytes.IndexAny(s, "\r\n\u2028\u2029")
+ i := bytes.IndexAny(s, lineTerminators)
if i == -1 {
return c, nil
}
- c.state = stateJS
+ c.state = endState
// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
// "However, the LineTerminator at the end of the line is not
// considered to be part of the single-line comment; it is recognised
return c, s[i:]
}
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, []byte) {
+ // TODO: look for </style
+
+ // CSS quoted strings are almost never used except for:
+ // (1) URLs as in background: "/foo.png"
+ // (2) Multiword font-names as in font-family: "Times New Roman"
+ // (3) List separators in content values as in inline-lists:
+ // <style>
+ // ul.inlineList { list-style: none; padding:0 }
+ // ul.inlineList > li { display: inline }
+ // ul.inlineList > li:before { content: ", " }
+ // ul.inlineList > li:first-child:before { content: "" }
+ // </style>
+ // <ul class=inlineList><li>One<li>Two<li>Three</ul>
+ // (4) Attribute value selectors as in a[href="http://example.com/"]
+ //
+ // We conservatively treat all strings as URLs, but make some
+ // allowances to avoid confusion.
+ //
+ // In (1), our conservative assumption is justified.
+ // In (2), valid font names do not contain ':', '?', or '#', so our
+ // conservative assumption is fine since we will never transition past
+ // urlPartPreQuery.
+ // In (3), our protocol heuristic should not be tripped, and there
+ // should not be non-space content after a '?' or '#', so as long as
+ // we only %-encode RFC 3986 reserved characters we are ok.
+ // In (4), we should URL escape for URL attributes, and for others we
+ // have the attribute name available if our conservative assumption
+ // proves problematic for real code.
+
+ for {
+ i := bytes.IndexAny(s, `("'/`)
+ if i == -1 {
+ return c, nil
+ }
+ switch s[i] {
+ case '(':
+ // Look for url to the left.
+ p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+ if endsWithCSSKeyword(p, "url") {
+ q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+ switch {
+ case len(q) != 0 && q[0] == '"':
+ c.state, s = stateCSSDqURL, q[1:]
+ case len(q) != 0 && q[0] == '\'':
+ c.state, s = stateCSSSqURL, q[1:]
+
+ default:
+ c.state, s = stateCSSURL, q
+ }
+ return c, s
+ }
+ case '/':
+ if i+1 < len(s) {
+ switch s[i+1] {
+ case '/':
+ c.state = stateCSSLineCmt
+ return c, s[i+2:]
+ case '*':
+ c.state = stateCSSBlockCmt
+ return c, s[i+2:]
+ }
+ }
+ case '"':
+ c.state = stateCSSDqStr
+ return c, s[i+1:]
+ case '\'':
+ c.state = stateCSSSqStr
+ return c, s[i+1:]
+ }
+ s = s[i+1:]
+ }
+ panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, []byte) {
+ // TODO: look for </style
+
+ var endAndEsc string
+ switch c.state {
+ case stateCSSDqStr, stateCSSDqURL:
+ endAndEsc = `\"`
+ case stateCSSSqStr, stateCSSSqURL:
+ endAndEsc = `\'`
+ case stateCSSURL:
+ // Unquoted URLs end with a newline or close parenthesis.
+ // The below includes the wc (whitespace character) and nl.
+ endAndEsc = "\\\t\n\f\r )"
+ default:
+ panic(c.state.String())
+ }
+
+ b := s
+ for {
+ i := bytes.IndexAny(b, endAndEsc)
+ if i == -1 {
+ return tURL(c, decodeCSS(b))
+ }
+ if b[i] == '\\' {
+ i++
+ if i == len(b) {
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
+ }, nil
+ }
+ } else {
+ c.state = stateCSS
+ return c, b[i+1:]
+ }
+ c, _ = tURL(c, decodeCSS(b[:i+1]))
+ b = b[i+1:]
+ }
+ panic("unreachable")
+}
+
// tError is the context transition function for the error state.
func tError(c context, s []byte) (context, []byte) {
return c, nil
"src": true,
"usemap": true,
}
-
-// urlFilter returns the HTML equivalent of its input unless it contains an
-// unsafe protocol in which case it defangs the entire URL.
-func urlFilter(args ...interface{}) string {
- ok := false
- var s string
- if len(args) == 1 {
- s, ok = args[0].(string)
- }
- if !ok {
- s = fmt.Sprint(args...)
- }
- i := strings.IndexRune(s, ':')
- if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
- protocol := strings.ToLower(s[:i])
- if protocol != "http" && protocol != "https" && protocol != "mailto" {
- // Return a value that someone investigating a bug
- // report can put into a search engine.
- return "#ZgotmplZ"
- }
- }
- // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
- // we will need to stop this from HTML escaping and pipeline sanitizers.
- return template.HTMLEscapeString(s)
-}
"true",
},
{
- // TODO: Make sure the URL escaper escapes single quotes so it can
- // be embedded in single quoted URI attributes and CSS url(...)
- // constructs. Single quotes are reserved in URLs, but are only used
- // in the obsolete "mark" rule in an appendix in RFC 3986 so can be
- // safely encoded.
"constant",
`<a href="/search?q={{"'a<b'"}}">`,
- `<a href="/search?q='a%3Cb'">`,
+ `<a href="/search?q=%27a%3cb%27">`,
},
{
"multipleAttrs",
`<a href='{{"javascript:alert(%22pwned%22)"}}'>`,
`<a href='#ZgotmplZ'>`,
},
+ {
+ "nonHierURL",
+ `<a href={{"mailto:Muhammed \"The Greatest\" Ali <m.ali@example.com>"}}>`,
+ `<a href=mailto:Muhammed "The Greatest" Ali <m.ali@example.com>>`,
+ },
{
"urlPath",
`<a href='http://{{"javascript:80"}}/foo'>`,
{
"urlQuery",
`<a href='/search?q={{.H}}'>`,
- `<a href='/search?q=%3CHello%3E'>`,
+ `<a href='/search?q=%3cHello%3e'>`,
},
{
"urlFragment",
`<a href='/faq#{{.H}}'>`,
- `<a href='/faq#%3CHello%3E'>`,
+ `<a href='/faq#%3cHello%3e'>`,
},
{
"urlBranch",
{
"urlBranchConflictMoot",
`<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
- `<a href="/foo?a=%3CCincinatti%3E">`,
+ `<a href="/foo?a=%3cCincinatti%3e">`,
},
{
"jsStrValue",
"<button onclick='alert("{{.H}}")'>",
`<button onclick='alert("\x3cHello\x3e")'>`,
},
+ {
+ "styleBidiKeywordPassed",
+ `<p style="dir: {{"ltr"}}">`,
+ `<p style="dir: ltr">`,
+ },
+ {
+ "styleBidiPropNamePassed",
+ `<p style="border-{{"left"}}: 0; border-{{"right"}}: 1in">`,
+ `<p style="border-left: 0; border-right: 1in">`,
+ },
+ {
+ "styleExpressionBlocked",
+ `<p style="width: {{"expression(alert(1337))"}}">`,
+ `<p style="width: ZgotmplZ">`,
+ },
+ {
+ "styleTagSelectorPassed",
+ `<style>{{"p"}} { color: pink }</style>`,
+ `<style>p { color: pink }</style>`,
+ },
+ {
+ "styleIDPassed",
+ `<style>p{{"#my-ID"}} { font: Arial }`,
+ `<style>p#my-ID { font: Arial }`,
+ },
+ {
+ "styleClassPassed",
+ `<style>p{{".my_class"}} { font: Arial }`,
+ `<style>p.my_class { font: Arial }`,
+ },
+ {
+ "styleQuantityPassed",
+ `<a style="left: {{"2em"}}; top: {{0}}">`,
+ `<a style="left: 2em; top: 0">`,
+ },
+ {
+ "stylePctPassed",
+ `<table style=width:{{"100%"}}>`,
+ `<table style=width:100%>`,
+ },
+ {
+ "styleColorPassed",
+ `<p style="color: {{"#8ff"}}; background: {{"#000"}}">`,
+ `<p style="color: #8ff; background: #000">`,
+ },
+ {
+ "styleObfuscatedExpressionBlocked",
+ `<p style="width: {{" e\78preS\0Sio/**/n(alert(1337))"}}">`,
+ `<p style="width: ZgotmplZ">`,
+ },
+ {
+ "styleMozBindingBlocked",
+ `<p style="{{"-moz-binding(alert(1337))"}}: ...">`,
+ `<p style="ZgotmplZ: ...">`,
+ },
+ {
+ "styleObfuscatedMozBindingBlocked",
+ `<p style="{{" -mo\7a-B\0I/**/nding(alert(1337))"}}: ...">`,
+ `<p style="ZgotmplZ: ...">`,
+ },
+ {
+ "styleFontNameString",
+ `<p style='font-family: "{{"Times New Roman"}}"'>`,
+ `<p style='font-family: "Times New Roman"'>`,
+ },
+ {
+ "styleFontNameString",
+ `<p style='font-family: "{{"Times New Roman"}}", "{{"sans-serif"}}"'>`,
+ `<p style='font-family: "Times New Roman", "sans-serif"'>`,
+ },
+ {
+ "styleFontNameUnquoted",
+ `<p style='font-family: {{"Times New Roman"}}'>`,
+ `<p style='font-family: Times New Roman'>`,
+ },
+ {
+ "styleURLQueryEncoded",
+ `<p style="background: url(/img?name={{"O'Reilly Animal(1)<2>.png"}})">`,
+ `<p style="background: url(/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png)">`,
+ },
+ {
+ "styleQuotedURLQueryEncoded",
+ `<p style="background: url('/img?name={{"O'Reilly Animal(1)<2>.png"}}')">`,
+ `<p style="background: url('/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png')">`,
+ },
+ {
+ "styleStrQueryEncoded",
+ `<p style="background: '/img?name={{"O'Reilly Animal(1)<2>.png"}}'">`,
+ `<p style="background: '/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png'">`,
+ },
+ {
+ "styleURLBadProtocolBlocked",
+ `<a style="background: url('{{"javascript:alert(1337)"}}')">`,
+ `<a style="background: url('#ZgotmplZ')">`,
+ },
+ {
+ "styleStrBadProtocolBlocked",
+ `<a style="background: '{{"javascript:alert(1337)"}}'">`,
+ `<a style="background: '#ZgotmplZ'">`,
+ },
+ {
+ "styleURLGoodProtocolPassed",
+ `<a style="background: url('{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}')">`,
+ `<a style="background: url('http://oreilly.com/O%27Reilly%20Animals%281%29%3c2%3e;%7b%7d.html')">`,
+ },
+ {
+ "styleStrGoodProtocolPassed",
+ `<a style="background: '{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}'">`,
+ `<a style="background: 'http\3a\2f\2foreilly.com\2fO\27Reilly Animals\28 1\29\3c 2\3e\3b\7b\7d.html'">`,
+ },
+ {
+ "styleURLMixedCase",
+ `<p style="background: URL(#{{.H}})">`,
+ `<p style="background: URL(#%3cHello%3e)">`,
+ },
+ {
+ "stylePropertyPairPassed",
+ `<a style='{{"color: red"}}'>`,
+ `<a style='color: red'>`,
+ },
+ {
+ "styleStrSpecialsEncoded",
+ `<a style="font-family: '{{"/**/'\";:// \\"}}', "{{"/**/'\";:// \\"}}"">`,
+ `<a style="font-family: '\2f**\2f\27\22\3b\3a\2f\2f \\', "\2f**\2f\27\22\3b\3a\2f\2f \\"">`,
+ },
+ {
+ "styleURLSpecialsEncoded",
+ // TODO: Find out what IE does with url(/*foo*/bar)
+ // FF, Chrome, and Safari seem to treat it as a URL.
+ `<a style="border-image: url({{"/**/'\";:// \\"}}), url("{{"/**/'\";:// \\"}}"), url('{{"/**/'\";:// \\"}}'), 'http://www.example.com/?q={{"/**/'\";:// \\"}}''">`,
+ `<a style="border-image: url(/**/%27%22;://%20%5c), url("/**/%27%22;://%20%5c"), url('/**/%27%22;://%20%5c'), 'http://www.example.com/?q=%2f%2a%2a%2f%27%22%3b%3a%2f%2f%20%5c''">`,
+ },
}
for _, test := range tests {
`unfinished JS regexp charset: "foo[\\]/"`,
},
{
- `<a onclick="/* alert({{.X}} */">`,
+ `<a onclick="/* alert({{.X}}) */">`,
`z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
},
{
- `<a onclick="// alert({{.X}}">`,
+ `<a onclick="// alert({{.X}})">`,
+ `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+ },
+ {
+ `<a style="/* color: {{.X}} */">`,
+ `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+ },
+ {
+ `<a style="// color: {{.X}}">`,
`z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
},
}
`<a onclick="/foo\/`,
context{state: stateJSRegexp, delim: delimDoubleQuote},
},
+ {
+ `<a onclick="/foo/`,
+ context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+ },
+ {
+ `<input checked style="`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="//`,
+ context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="//</script>`,
+ context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+ },
+ {
+ "<a style='//\n",
+ context{state: stateCSS, delim: delimSingleQuote},
+ },
+ {
+ "<a style='//\r",
+ context{state: stateCSS, delim: delimSingleQuote},
+ },
+ {
+ `<a style="/*`,
+ context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="/*/`,
+ context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="/**/`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: '`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: "`,
+ context{state: stateCSSDqStr, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: '/foo?img=`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+ },
+ {
+ `<a style="background: '/`,
+ context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url("/`,
+ context{state: stateCSSDqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/`,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/)`,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url('/ `,
+ context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url(/`,
+ context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+ },
+ {
+ `<a style="background: url( `,
+ context{state: stateCSSURL, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url( /image?name=`,
+ context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+ },
+ {
+ `<a style="background: url(x)`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url('x'`,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
+ {
+ `<a style="background: url( x `,
+ context{state: stateCSS, delim: delimDoubleQuote},
+ },
}
for _, test := range tests {
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "bytes"
+ "fmt"
+ "utf8"
+)
+
+// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
+func htmlNospaceEscaper(args ...interface{}) string {
+ s := stringify(args...)
+ // The set of runes escaped is the union of the HTML specials and
+ // those determined by running the JS below in browsers:
+
+ // <div id=d></div>
+ // <script>(function () {
+ // var a = [], d = document.getElementById("d"), i, c, s;
+ // for (i = 0; i < 0x10000; ++i) {
+ // c = String.fromCharCode(i);
+ // d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
+ // s = d.getElementsByTagName("SPAN")[0];
+ // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
+ // }
+ // document.write(a.join(", "));
+ // })()</script>
+
+ var b bytes.Buffer
+ written := 0
+ for i, r := range s {
+ var repl string
+ switch r {
+ case 0:
+ // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state: "
+ // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
+ // CHARACTER character to the current attribute's value.
+ // "
+ // and similarly
+ // http://www.w3.org/TR/html5/tokenization.html#before-attribute-value-state
+ repl = "\uFFFD"
+ case '\t':
+ repl = "	"
+ case '\n':
+ repl = " "
+ case '\v':
+ repl = ""
+ case '\f':
+ repl = ""
+ case '\r':
+ repl = " "
+ case ' ':
+ repl = " "
+ case '"':
+ repl = """
+ case '&':
+ repl = "&"
+ case '\'':
+ repl = "'"
+ case '+':
+ repl = "+"
+ case '<':
+ repl = "<"
+ case '=':
+ repl = "="
+ case '>':
+ repl = ">"
+ case '`':
+ // A parse error in the attribute value (unquoted) and
+ // before attribute value states.
+ // Treated as a quoting character by IE.
+ repl = "`"
+ default:
+ // IE does not allow the ranges below raw in attributes.
+ if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
+ b.WriteString(s[written:i])
+ b.WriteString("&#x")
+ b.WriteByte("0123456789abcdef"[r>>24])
+ b.WriteByte("0123456789abcdef"[r>>16&0xf])
+ b.WriteByte("0123456789abcdef"[r>>8&0xf])
+ b.WriteByte("0123456789abcdef"[r&0xf])
+ b.WriteByte(';')
+ fmt.Fprintf(&b, "&#x%x;", r)
+ written = i + utf8.RuneLen(r)
+ }
+ continue
+ }
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ // Valid as long as we don't include any cases above in the
+ // 0x80-0xff range.
+ written = i + utf8.RuneLen(r)
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "html"
+ "strings"
+ "testing"
+)
+
+func TestHTMLNospaceEscaper(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ want := ("\ufffd\x01\x02\x03\x04\x05\x06\x07" +
+ "\x08	  \x0E\x0F" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17" +
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ ``abcdefghijklmno` +
+ `pqrstuvwxyz{|}~` + "\u007f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ got := htmlNospaceEscaper(input)
+ if got != want {
+ t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+
+ got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
+ if want != got {
+ t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
+ }
+}
+
+func BenchmarkHTMLNospaceEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ htmlNospaceEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+ }
+}
+
+func BenchmarkHTMLNospaceEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ htmlNospaceEscaper("The_quick,_brown_fox_jumps_over_the_lazy_dog.")
+ }
+}
'}': `\}`,
}
-// isJSIdentPart is true if the given rune is a JS identifier part.
+// isJSIdentPart returns whether the given rune is a JS identifier part.
// It does not handle all the non-Latin letters, joiners, and combining marks,
// but it does handle every codepoint that can occur in a numeric literal or
// a keyword.
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "bytes"
+ "fmt"
+ "strings"
+)
+
+// urlFilter returns the HTML equivalent of its input unless it contains an
+// unsafe protocol in which case it defangs the entire URL.
+func urlFilter(args ...interface{}) string {
+ s := stringify(args...)
+ i := strings.IndexRune(s, ':')
+ if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+ protocol := strings.ToLower(s[:i])
+ if protocol != "http" && protocol != "https" && protocol != "mailto" {
+ // Return a value that someone investigating a bug
+ // report can put into a search engine.
+ return "#" + filterFailsafe
+ }
+ }
+ // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
+ // we will need to stop this from HTML escaping and pipeline sanitizers.
+ return s
+}
+
+// urlEscaper produces an output that can be embedded in a URL query.
+// The output can be embedded in an HTML attribute without further escaping.
+func urlEscaper(args ...interface{}) string {
+ return urlProcessor(false, args...)
+}
+
+// urlEscaper normalizes URL content so it can be embedded in a quote-delimited
+// string or parenthesis delimited url(...).
+// The normalizer does not encode all HTML specials. Specifically, it does not
+// encode '&' so correct embedding in an HTML attribute requires escaping of
+// '&' to '&'.
+func urlNormalizer(args ...interface{}) string {
+ return urlProcessor(true, args...)
+}
+
+// urlProcessor normalizes (when norm is true) or escapes its input to produce
+// a valid hierarchical or opaque URL part.
+func urlProcessor(norm bool, args ...interface{}) string {
+ s := stringify(args...)
+ var b bytes.Buffer
+ written := 0
+ // The byte loop below assumes that all URLs use UTF-8 as the
+ // content-encoding. This is similar to the URI to IRI encoding scheme
+ // defined in section 3.1 of RFC 3987, and behaves the same as the
+ // EcmaScript builtin encodeURIComponent.
+ // It should not cause any misencoding of URLs in pages with
+ // Content-type: text/html;charset=UTF-8.
+ for i, n := 0, len(s); i < n; i++ {
+ c := s[i]
+ switch c {
+ // Single quote and parens are sub-delims in RFC 3986, but we
+ // escape them so the output can be embedded in in single
+ // quoted attributes and unquoted CSS url(...) constructs.
+ // Single quotes are reserved in URLs, but are only used in
+ // the obsolete "mark" rule in an appendix in RFC 3986
+ // so can be safely encoded.
+ case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
+ if norm {
+ continue
+ }
+ // Unreserved according to RFC 3986 sec 2.3
+ // "For consistency, percent-encoded octets in the ranges of
+ // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+ // period (%2E), underscore (%5F), or tilde (%7E) should not be
+ // created by URI producers
+ case '-', '.', '_', '~':
+ continue
+ case '%':
+ // When normalizing do not re-encode valid escapes.
+ if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
+ continue
+ }
+ default:
+ // Unreserved according to RFC 3986 sec 2.3
+ if 'a' <= c && c <= 'z' {
+ continue
+ }
+ if 'A' <= c && c <= 'Z' {
+ continue
+ }
+ if '0' <= c && c <= '9' {
+ continue
+ }
+ }
+ b.WriteString(s[written:i])
+ fmt.Fprintf(&b, "%%%02x", c)
+ written = i + 1
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "testing"
+)
+
+func TestURLNormalizer(t *testing.T) {
+ tests := []struct {
+ url, want string
+ }{
+ {"", ""},
+ {
+ "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+ "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+ },
+ {" ", "%20"},
+ {"%7c", "%7c"},
+ {"%7C", "%7C"},
+ {"%2", "%252"},
+ {"%", "%25"},
+ {"%z", "%25z"},
+ {"/foo|bar/%5c\u1234", "/foo%7cbar/%5c%e1%88%b4"},
+ }
+ for _, test := range tests {
+ if got := urlNormalizer(test.url); test.want != got {
+ t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.url, test.want, got)
+ }
+ if test.want != urlNormalizer(test.want) {
+ t.Errorf("not idempotent: %q", test.want)
+ }
+ }
+}
+
+func TestURLFilters(t *testing.T) {
+ input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+ ` !"#$%&'()*+,-./` +
+ `0123456789:;<=>?` +
+ `@ABCDEFGHIJKLMNO` +
+ `PQRSTUVWXYZ[\]^_` +
+ "`abcdefghijklmno" +
+ "pqrstuvwxyz{|}~\x7f" +
+ "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+ tests := []struct {
+ name string
+ escaper func(...interface{}) string
+ escaped string
+ }{
+ {
+ "urlEscaper",
+ urlEscaper,
+ "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+ "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+ "%20%21%22%23%24%25%26%27%28%29%2a%2b%2c-.%2f" +
+ "0123456789%3a%3b%3c%3d%3e%3f" +
+ "%40ABCDEFGHIJKLMNO" +
+ "PQRSTUVWXYZ%5b%5c%5d%5e_" +
+ "%60abcdefghijklmno" +
+ "pqrstuvwxyz%7b%7c%7d~%7f" +
+ "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+ },
+ {
+ "urlNormalizer",
+ urlNormalizer,
+ "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+ "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+ "%20!%22#$%25&%27%28%29*+,-./" +
+ "0123456789:;%3c=%3e?" +
+ "@ABCDEFGHIJKLMNO" +
+ "PQRSTUVWXYZ[%5c]%5e_" +
+ "%60abcdefghijklmno" +
+ "pqrstuvwxyz%7b%7c%7d~%7f" +
+ "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+ },
+ }
+
+ for _, test := range tests {
+ if s := test.escaper(input); s != test.escaped {
+ t.Errorf("%s: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+ continue
+ }
+ }
+}
+
+func BenchmarkURLEscaper(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlEscaper("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+ }
+}
+
+func BenchmarkURLEscaperNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlEscaper("TheQuickBrownFoxJumpsOverTheLazyDog.")
+ }
+}
+
+func BenchmarkURLNormalizer(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlNormalizer("The quick brown fox jumps over the lazy dog.\n")
+ }
+}
+
+func BenchmarkURLNormalizerNoSpecials(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ urlNormalizer("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+ }
+}