exp/template/html: autoescape actions in HTML style attributes.

author Mike Samuel <mikesamuel@gmail.com>

Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)

committer Nigel Tao <nigeltao@golang.org>

Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)
author Mike Samuel <mikesamuel@gmail.com>
Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)
committer Nigel Tao <nigeltao@golang.org>
Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)
diff --git a/src/pkg/exp/template/html/Makefile b/src/pkg/exp/template/html/Makefile

index 3a93bebc091e37728df87cc33db02767d4ceb5d7..cc346179ef6d7aca98ca39c2486c575de759b6c9 100644 (file)
--- a/src/pkg/exp/template/html/Makefile
+++ b/src/pkg/exp/template/html/Makefile
@@ -7,7 +7,10 @@ include ../../../../Make.inc
  TARG=exp/template/html
  GOFILES=\
         context.go\
+       css.go\
         escape.go\
+       html.go\
         js.go\
+       url.go\
  
  include ../../../../Make.pkg
diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go

index 428b3d0b3af785261a5991fed7bd6cfa9c0f5c6f..1a3fb44a3eb7f492fe4cd151d3ff7cf0c02abf40 100644 (file)
--- a/src/pkg/exp/template/html/context.go
+++ b/src/pkg/exp/template/html/context.go
@@ -63,23 +63,47 @@ const (
         stateJSBlockCmt
         // stateJSLineCmt occurs inside a JavaScript // line comment.
         stateJSLineCmt
+       // stateCSS occurs inside a <style> element or style attribute.
+       stateCSS
+       // stateCSSDqStr occurs inside a CSS double quoted string.
+       stateCSSDqStr
+       // stateCSSSqStr occurs inside a CSS single quoted string.
+       stateCSSSqStr
+       // stateCSSDqURL occurs inside a CSS double quoted url("...").
+       stateCSSDqURL
+       // stateCSSSqURL occurs inside a CSS single quoted url('...').
+       stateCSSSqURL
+       // stateCSSURL occurs inside a CSS unquoted url(...).
+       stateCSSURL
+       // stateCSSBlockCmt occurs inside a CSS /* block comment */.
+       stateCSSBlockCmt
+       // stateCSSLineCmt occurs inside a CSS // line comment.
+       stateCSSLineCmt
         // stateError is an infectious error state outside any valid
         // HTML/CSS/JS construct.
         stateError
  )
  
  var stateNames = [...]string{
-       stateText:       "stateText",
-       stateTag:        "stateTag",
-       stateAttr:       "stateAttr",
-       stateURL:        "stateURL",
-       stateJS:         "stateJS",
-       stateJSDqStr:    "stateJSDqStr",
-       stateJSSqStr:    "stateJSSqStr",
-       stateJSRegexp:   "stateJSRegexp",
-       stateJSBlockCmt: "stateJSBlockCmt",
-       stateJSLineCmt:  "stateJSLineCmt",
-       stateError:      "stateError",
+       stateText:        "stateText",
+       stateTag:         "stateTag",
+       stateAttr:        "stateAttr",
+       stateURL:         "stateURL",
+       stateJS:          "stateJS",
+       stateJSDqStr:     "stateJSDqStr",
+       stateJSSqStr:     "stateJSSqStr",
+       stateJSRegexp:    "stateJSRegexp",
+       stateJSBlockCmt:  "stateJSBlockCmt",
+       stateJSLineCmt:   "stateJSLineCmt",
+       stateCSS:         "stateCSS",
+       stateCSSDqStr:    "stateCSSDqStr",
+       stateCSSSqStr:    "stateCSSSqStr",
+       stateCSSDqURL:    "stateCSSDqURL",
+       stateCSSSqURL:    "stateCSSSqURL",
+       stateCSSURL:      "stateCSSURL",
+       stateCSSBlockCmt: "stateCSSBlockCmt",
+       stateCSSLineCmt:  "stateCSSLineCmt",
+       stateError:       "stateError",
  }
  
  func (s state) String() string {
@@ -132,8 +156,8 @@ const (
         // urlPartQueryOrFrag occurs in the query portion between the ^s in
         // "http://auth/path?^k=v#frag^".
         urlPartQueryOrFrag
-       // urlPartUnknown occurs due to joining of contexts both before and after
-       // the query separator.
+       // urlPartUnknown occurs due to joining of contexts both before and
+       // after the query separator.
         urlPartUnknown
  )
  
diff --git a/src/pkg/exp/template/html/css.go b/src/pkg/exp/template/html/css.go

new file mode 100644 (file)

index 0000000..79c603f
--- /dev/null
+++ b/src/pkg/exp/template/html/css.go
@@ -0,0 +1,255 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "unicode"
+       "utf8"
+)
+
+// endsWithCSSKeyword returns whether b ends with an ident that
+// case-insensitively matches the lower-case kw.
+func endsWithCSSKeyword(b []byte, kw string) bool {
+       i := len(b) - len(kw)
+       if i < 0 {
+               // Too short.
+               return false
+       }
+       if i != 0 {
+               r, _ := utf8.DecodeLastRune(b[:i])
+               if isCSSNmchar(r) {
+                       // Too long.
+                       return false
+               }
+       }
+       // Many CSS keywords, such as "!important" can have characters encoded,
+       // but the URI production does not allow that according to
+       // http://www.w3.org/TR/css3-syntax/#TOK-URI
+       // This does not attempt to recognize encoded keywords. For example,
+       // given "\75\72\6c" and "url" this return false.
+       return string(bytes.ToLower(b[i:])) == kw
+}
+
+// isCSSNmchar returns whether rune is allowed anywhere in a CSS identifier.
+func isCSSNmchar(rune int) bool {
+       // Based on the CSS3 nmchar production but ignores multi-rune escape
+       // sequences.
+       // http://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
+       return 'a' <= rune && rune <= 'z' ||
+               'A' <= rune && rune <= 'Z' ||
+               '0' <= rune && rune <= '9' ||
+               '-' == rune ||
+               '_' == rune ||
+               // Non-ASCII cases below.
+               0x80 <= rune && rune <= 0xd7ff ||
+               0xe000 <= rune && rune <= 0xfffd ||
+               0x10000 <= rune && rune <= 0x10ffff
+}
+
+// decodeCSS decodes CSS3 escapes given a sequence of stringchars.
+// If there is no change, it returns the input, otherwise it returns a slice
+// backed by a new array.
+// http://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
+func decodeCSS(s []byte) []byte {
+       i := bytes.IndexByte(s, '\\')
+       if i == -1 {
+               return s
+       }
+       // The UTF-8 sequence for a codepoint is never longer than 1 + the
+       // number hex digits need to represent that codepoint, so len(s) is an
+       // upper bound on the output length.
+       b := make([]byte, 0, len(s))
+       for len(s) != 0 {
+               i := bytes.IndexByte(s, '\\')
+               if i == -1 {
+                       i = len(s)
+               }
+               b, s = append(b, s[:i]...), s[i:]
+               if len(s) < 2 {
+                       break
+               }
+               // http://www.w3.org/TR/css3-syntax/#SUBTOK-escape
+               // escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+               if isHex(s[1]) {
+                       // http://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
+                       //   unicode ::= '\' [0-9a-fA-F]{1,6} wc?
+                       j := 2
+                       for j < len(s) && j < 7 && isHex(s[j]) {
+                               j++
+                       }
+                       rune := hexDecode(s[1:j])
+                       if rune > unicode.MaxRune {
+                               rune, j = rune/16, j-1
+                       }
+                       n := utf8.EncodeRune(b[len(b):cap(b)], rune)
+                       // The optional space at the end allows a hex
+                       // sequence to be followed by a literal hex.
+                       // string(decodeCSS([]byte(`\A B`))) == "\nB"
+                       b, s = b[:len(b)+n], skipCSSSpace(s[j:])
+               } else {
+                       // `\\` decodes to `\` and `\"` to `"`.
+                       _, n := utf8.DecodeRune(s[1:])
+                       b, s = append(b, s[1:1+n]...), s[1+n:]
+               }
+       }
+       return b
+}
+
+// isHex returns whether the given character is a hex digit.
+func isHex(c byte) bool {
+       return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
+}
+
+// hexDecode decodes a short hex digit sequence: "10" -> 16.
+func hexDecode(s []byte) int {
+       n := 0
+       for _, c := range s {
+               n <<= 4
+               switch {
+               case '0' <= c && c <= '9':
+                       n |= int(c - '0')
+               case 'a' <= c && c <= 'f':
+                       n |= int(c-'a') + 10
+               case 'A' <= c && c <= 'F':
+                       n |= int(c-'A') + 10
+               default:
+                       panic(fmt.Sprintf("Bad hex digit in %q", s))
+               }
+       }
+       return n
+}
+
+// skipCSSSpace returns a suffix of c, skipping over a single space.
+func skipCSSSpace(c []byte) []byte {
+       if len(c) == 0 {
+               return c
+       }
+       // wc ::= #x9 | #xA | #xC | #xD | #x20
+       switch c[0] {
+       case '\t', '\n', '\f', ' ':
+               return c[1:]
+       case '\r':
+               // This differs from CSS3's wc production because it contains a
+               // probable spec error whereby wc contains all the single byte
+               // sequences in nl (newline) but not CRLF.
+               if len(c) >= 2 && c[1] == '\n' {
+                       return c[2:]
+               }
+               return c[1:]
+       }
+       return c
+}
+
+// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
+func cssEscaper(args ...interface{}) string {
+       s := stringify(args...)
+       var b bytes.Buffer
+       written := 0
+       for i, r := range s {
+               var repl string
+               switch r {
+               case 0:
+                       repl = `\0`
+               case '\t':
+                       repl = `\9`
+               case '\n':
+                       repl = `\a`
+               case '\f':
+                       repl = `\c`
+               case '\r':
+                       repl = `\d`
+               // Encode HTML specials as hex so the output can be embedded
+               // in HTML attributes without further encoding.
+               case '"':
+                       repl = `\22`
+               case '&':
+                       repl = `\26`
+               case '\'':
+                       repl = `\27`
+               case '(':
+                       repl = `\28`
+               case ')':
+                       repl = `\29`
+               case '+':
+                       repl = `\2b`
+               case '/':
+                       repl = `\2f`
+               case ':':
+                       repl = `\3a`
+               case ';':
+                       repl = `\3b`
+               case '<':
+                       repl = `\3c`
+               case '>':
+                       repl = `\3e`
+               case '\\':
+                       repl = `\\`
+               case '{':
+                       repl = `\7b`
+               case '}':
+                       repl = `\7d`
+               default:
+                       continue
+               }
+               b.WriteString(s[written:i])
+               b.WriteString(repl)
+               written = i + utf8.RuneLen(r)
+               if repl != `\\` && (written == len(s) || isHex(s[written])) {
+                       b.WriteByte(' ')
+               }
+       }
+       if written == 0 {
+               return s
+       }
+       b.WriteString(s[written:])
+       return b.String()
+}
+
+var expressionBytes = []byte("expression")
+var mozBindingBytes = []byte("mozbinding")
+
+// cssValueFilter allows innocuous CSS values in the output including CSS
+// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
+// (inherit, blue), and colors (#888).
+// It filters out unsafe values, such as those that affect token boundaries,
+// and anything that might execute scripts.
+func cssValueFilter(args ...interface{}) string {
+       s, id := decodeCSS([]byte(stringify(args...))), make([]byte, 0, 64)
+
+       // CSS3 error handling is specified as honoring string boundaries per
+       // http://www.w3.org/TR/css3-syntax/#error-handling :
+       //     Malformed declarations. User agents must handle unexpected
+       //     tokens encountered while parsing a declaration by reading until
+       //     the end of the declaration, while observing the rules for
+       //     matching pairs of (), [], {}, "", and '', and correctly handling
+       //     escapes. For example, a malformed declaration may be missing a
+       //     property, colon (:) or value.
+       // So we need to make sure that values do not have mismatched bracket
+       // or quote characters to prevent the browser from restarting parsing
+       // inside a string that might embed JavaScript source.
+       for i, c := range s {
+               switch c {
+               case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}':
+                       return filterFailsafe
+               case '-':
+                       // Disallow <!-- or -->.
+                       // -- should not appear in valid identifiers.
+                       if i != 0 && '-' == s[i-1] {
+                               return filterFailsafe
+                       }
+               default:
+                       if c < 0x80 && isCSSNmchar(int(c)) {
+                               id = append(id, c)
+                       }
+               }
+       }
+       id = bytes.ToLower(id)
+       if bytes.Index(id, expressionBytes) != -1 || bytes.Index(id, mozBindingBytes) != -1 {
+               return filterFailsafe
+       }
+       return string(s)
+}
diff --git a/src/pkg/exp/template/html/css_test.go b/src/pkg/exp/template/html/css_test.go

new file mode 100644 (file)

index 0000000..5ba3e77
--- /dev/null
+++ b/src/pkg/exp/template/html/css_test.go
@@ -0,0 +1,277 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "strconv"
+       "strings"
+       "testing"
+)
+
+func TestEndsWithCSSKeyword(t *testing.T) {
+       tests := []struct {
+               css, kw string
+               want    bool
+       }{
+               {"", "url", false},
+               {"url", "url", true},
+               {"URL", "url", true},
+               {"Url", "url", true},
+               {"url", "important", false},
+               {"important", "important", true},
+               {"image-url", "url", false},
+               {"imageurl", "url", false},
+               {"image url", "url", true},
+       }
+       for _, test := range tests {
+               got := endsWithCSSKeyword([]byte(test.css), test.kw)
+               if got != test.want {
+                       t.Errorf("want %t but got %t for css=%v, kw=%v", test.want, got, test.css, test.kw)
+               }
+       }
+}
+
+func TestIsCSSNmchar(t *testing.T) {
+       tests := []struct {
+               rune int
+               want bool
+       }{
+               {0, false},
+               {'0', true},
+               {'9', true},
+               {'A', true},
+               {'Z', true},
+               {'a', true},
+               {'z', true},
+               {'_', true},
+               {'-', true},
+               {':', false},
+               {';', false},
+               {' ', false},
+               {0x7f, false},
+               {0x80, true},
+               {0x1234, true},
+               {0xd800, false},
+               {0xdc00, false},
+               {0xfffe, false},
+               {0x10000, true},
+               {0x110000, false},
+       }
+       for _, test := range tests {
+               got := isCSSNmchar(test.rune)
+               if got != test.want {
+                       t.Errorf("%q: want %t but got %t", string(test.rune), test.want, got)
+               }
+       }
+}
+
+func TestDecodeCSS(t *testing.T) {
+       tests := []struct {
+               css, want string
+       }{
+               {``, ``},
+               {`foo`, `foo`},
+               {`foo\`, `foo`},
+               {`foo\\`, `foo\`},
+               {`\`, ``},
+               {`\A`, "\n"},
+               {`\a`, "\n"},
+               {`\0a`, "\n"},
+               {`\00000a`, "\n"},
+               {`\000000a`, "\u0000a"},
+               {`\1234 5`, "\u1234" + "5"},
+               {`\1234\20 5`, "\u1234" + " 5"},
+               {`\1234\A 5`, "\u1234" + "\n5"},
+               {"\\1234\t5", "\u1234" + "5"},
+               {"\\1234\n5", "\u1234" + "5"},
+               {"\\1234\r\n5", "\u1234" + "5"},
+               {`\12345`, "\U00012345"},
+               {`\\`, `\`},
+               {`\\ `, `\ `},
+               {`\"`, `"`},
+               {`\'`, `'`},
+               {`\.`, `.`},
+               {`\. .`, `. .`},
+               {
+                       `The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e  fox jumps\2028over the \3c canine class=\22lazy\22 \3e dog\3c/canine\3e`,
+                       "The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>",
+               },
+       }
+       for _, test := range tests {
+               got := string(decodeCSS([]byte(test.css)))
+               if got != test.want {
+                       t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.css, test.want, got)
+               }
+       }
+}
+
+func TestHexDecode(t *testing.T) {
+       for i := 0; i < 0x200000; i += 101 /* coprime with 16 */ {
+               s := strconv.Itob(i, 16)
+               if got := hexDecode([]byte(s)); got != i {
+                       t.Errorf("%s: want %d but got %d", s, i, got)
+               }
+               s = strings.ToUpper(s)
+               if got := hexDecode([]byte(s)); got != i {
+                       t.Errorf("%s: want %d but got %d", s, i, got)
+               }
+       }
+}
+
+func TestSkipCSSSpace(t *testing.T) {
+       tests := []struct {
+               css, want string
+       }{
+               {"", ""},
+               {"foo", "foo"},
+               {"\n", ""},
+               {"\r\n", ""},
+               {"\r", ""},
+               {"\t", ""},
+               {" ", ""},
+               {"\f", ""},
+               {" foo", "foo"},
+               {"  foo", " foo"},
+               {`\20`, `\20`},
+       }
+       for _, test := range tests {
+               got := string(skipCSSSpace([]byte(test.css)))
+               if got != test.want {
+                       t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+               }
+       }
+}
+
+func TestCSSEscaper(t *testing.T) {
+       input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               ` !"#$%&'()*+,-./` +
+               `0123456789:;<=>?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\]^_` +
+               "`abcdefghijklmno" +
+               "pqrstuvwxyz{|}~\x7f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       want := ("\\0\x01\x02\x03\x04\x05\x06\x07" +
+               "\x08\\9\\a\x0b\\c\\d\x0E\x0F" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17" +
+               "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               ` !\22#$%\26\27\28\29*\2b,-.\2f ` +
+               `0123456789\3a\3b\3c=\3e?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\\]^_` +
+               "`abcdefghijklmno" +
+               `pqrstuvwxyz\7b|\7d~` + "\u007f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       got := cssEscaper(input)
+       if got != want {
+               t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+       }
+
+       got = string(decodeCSS([]byte(got)))
+       if input != got {
+               t.Errorf("decode: want\n\t%q\nbut got\n\t%q", input, got)
+       }
+}
+
+func TestCSSValueFilter(t *testing.T) {
+       tests := []struct {
+               css, want string
+       }{
+               {"", ""},
+               {"foo", "foo"},
+               {"0", "0"},
+               {"0px", "0px"},
+               {"-5px", "-5px"},
+               {"1.25in", "1.25in"},
+               {"+.33em", "+.33em"},
+               {"100%", "100%"},
+               {"12.5%", "12.5%"},
+               {".foo", ".foo"},
+               {"#bar", "#bar"},
+               {"corner-radius", "corner-radius"},
+               {"-moz-corner-radius", "-moz-corner-radius"},
+               {"#000", "#000"},
+               {"#48f", "#48f"},
+               {"#123456", "#123456"},
+               {"U+00-FF, U+980-9FF", "U+00-FF, U+980-9FF"},
+               {"color: red", "color: red"},
+               {"<!--", "ZgotmplZ"},
+               {"-->", "ZgotmplZ"},
+               {"<![CDATA[", "ZgotmplZ"},
+               {"]]>", "ZgotmplZ"},
+               {"</style", "ZgotmplZ"},
+               {`"`, "ZgotmplZ"},
+               {`'`, "ZgotmplZ"},
+               {"`", "ZgotmplZ"},
+               {"\x00", "ZgotmplZ"},
+               {"/* foo */", "ZgotmplZ"},
+               {"//", "ZgotmplZ"},
+               {"[href=~", "ZgotmplZ"},
+               {"expression(alert(1337))", "ZgotmplZ"},
+               {"-expression(alert(1337))", "ZgotmplZ"},
+               {"expression", "ZgotmplZ"},
+               {"Expression", "ZgotmplZ"},
+               {"EXPRESSION", "ZgotmplZ"},
+               {"-moz-binding", "ZgotmplZ"},
+               {"-expr\x00ession(alert(1337))", "ZgotmplZ"},
+               {`-expr\0ession(alert(1337))`, "ZgotmplZ"},
+               {`-express\69on(alert(1337))`, "ZgotmplZ"},
+               {`-express\69 on(alert(1337))`, "ZgotmplZ"},
+               {`-exp\72 ession(alert(1337))`, "ZgotmplZ"},
+               {`-exp\52 ession(alert(1337))`, "ZgotmplZ"},
+               {`-exp\000052 ession(alert(1337))`, "ZgotmplZ"},
+               {`-expre\0000073sion`, "-expre\x073sion"},
+               {`@import url evil.css`, "ZgotmplZ"},
+       }
+       for _, test := range tests {
+               got := cssValueFilter(test.css)
+               if got != test.want {
+                       t.Errorf("%q: want %q but got %q", test.css, test.want, got)
+               }
+       }
+}
+
+func BenchmarkCSSEscaper(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               cssEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+       }
+}
+
+func BenchmarkCSSEscaperNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               cssEscaper("The quick, brown fox jumps over the lazy dog.")
+       }
+}
+
+func BenchmarkDecodeCSS(b *testing.B) {
+       s := []byte(`The \3c i\3equick\3c/i\3e,\d\A\3cspan style=\27 color:brown\27\3e brown\3c/span\3e fox jumps\2028over the \3c canine class=\22lazy\22 \3edog\3c/canine\3e`)
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               decodeCSS(s)
+       }
+}
+
+func BenchmarkDecodeCSSNoSpecials(b *testing.B) {
+       s := []byte("The quick, brown fox jumps over the lazy dog.")
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               decodeCSS(s)
+       }
+}
+
+func BenchmarkCSSValueFilter(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               cssValueFilter(`  e\78preS\0Sio/**/n(alert(1337))`)
+       }
+}
+
+func BenchmarkCSSValueFilterOk(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               cssValueFilter(`Times New Roman`)
+       }
+}
diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go

index 0eb8dfec8d523fea282cdfa66c708bdec459c4d6..929444eca0c70482979443bc139b47d7eebe6af5 100644 (file)
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@@ -33,12 +33,24 @@ func Escape(t *template.Template) (*template.Template, os.Error) {
  
  // funcMap maps command names to functions that render their inputs safe.
  var funcMap = template.FuncMap{
-       "exp_template_html_urlfilter":       urlFilter,
-       "exp_template_html_jsvalescaper":    jsValEscaper,
-       "exp_template_html_jsstrescaper":    jsStrEscaper,
+       "exp_template_html_cssescaper":      cssEscaper,
+       "exp_template_html_cssvaluefilter":  cssValueFilter,
         "exp_template_html_jsregexpescaper": jsRegexpEscaper,
+       "exp_template_html_jsstrescaper":    jsStrEscaper,
+       "exp_template_html_jsvalescaper":    jsValEscaper,
+       "exp_template_html_nospaceescaper":  htmlNospaceEscaper,
+       "exp_template_html_urlescaper":      urlEscaper,
+       "exp_template_html_urlfilter":       urlFilter,
+       "exp_template_html_urlnormalizer":   urlNormalizer,
  }
  
+// filterFailsafe is an innocuous word that is emitted in place of unsafe values
+// by sanitizer functions.  It is not a keyword in any programming language,
+// contains no special characters, is not empty, and when it appears in output
+// it is distinct enough that a developer can find the source of the problem
+// via a search engine.
+const filterFailsafe = "ZgotmplZ"
+
  // escape escapes a template node.
  func escape(c context, n parse.Node) context {
         switch n := n.(type) {
@@ -61,16 +73,22 @@ func escape(c context, n parse.Node) context {
  
  // escapeAction escapes an action template node.
  func escapeAction(c context, n *parse.ActionNode) context {
-       s := make([]string, 0, 2)
+       s := make([]string, 0, 3)
         switch c.state {
-       case stateURL:
+       case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
                 switch c.urlPart {
                 case urlPartNone:
                         s = append(s, "exp_template_html_urlfilter")
-               case urlPartQueryOrFrag:
-                       s = append(s, "urlquery")
+                       fallthrough
                 case urlPartPreQuery:
-                       s = append(s, "html")
+                       switch c.state {
+                       case stateCSSDqStr, stateCSSSqStr:
+                               s = append(s, "exp_template_html_cssescaper")
+                       case stateCSSDqURL, stateCSSSqURL, stateCSSURL:
+                               s = append(s, "exp_template_html_urlnormalizer")
+                       }
+               case urlPartQueryOrFrag:
+                       s = append(s, "exp_template_html_urlescaper")
                 case urlPartUnknown:
                         return context{
                                 state:   stateError,
@@ -82,19 +100,26 @@ func escapeAction(c context, n *parse.ActionNode) context {
                 }
         case stateJS:
                 s = append(s, "exp_template_html_jsvalescaper")
-               if c.delim != delimNone {
-                       s = append(s, "html")
-               }
         case stateJSDqStr, stateJSSqStr:
                 s = append(s, "exp_template_html_jsstrescaper")
         case stateJSRegexp:
                 s = append(s, "exp_template_html_jsregexpescaper")
-       case stateJSBlockCmt, stateJSLineCmt:
+       case stateJSBlockCmt, stateJSLineCmt, stateCSSBlockCmt, stateCSSLineCmt:
                 return context{
                         state:   stateError,
                         errLine: n.Line,
                         errStr:  fmt.Sprintf("%s appears inside a comment", n),
                 }
+       case stateCSS:
+               s = append(s, "exp_template_html_cssvaluefilter")
+       case stateText:
+               s = append(s, "html")
+       }
+       switch c.delim {
+       case delimNone:
+               // No extra-escaping needed for raw text content.
+       case delimSpaceOrTagEnd:
+               s = append(s, "exp_template_html_nospaceescaper")
         default:
                 s = append(s, "html")
         }
@@ -280,17 +305,25 @@ func escapeText(c context, s []byte) context {
  // A transition function takes a context and template text input, and returns
  // the updated context and any unconsumed text.
  var transitionFunc = [...]func(context, []byte) (context, []byte){
-       stateText:       tText,
-       stateTag:        tTag,
-       stateURL:        tURL,
-       stateJS:         tJS,
-       stateJSDqStr:    tJSStr,
-       stateJSSqStr:    tJSStr,
-       stateJSRegexp:   tJSRegexp,
-       stateJSBlockCmt: tJSBlockCmt,
-       stateJSLineCmt:  tJSLineCmt,
-       stateAttr:       tAttr,
-       stateError:      tError,
+       stateText:        tText,
+       stateTag:         tTag,
+       stateAttr:        tAttr,
+       stateURL:         tURL,
+       stateJS:          tJS,
+       stateJSDqStr:     tJSStr,
+       stateJSSqStr:     tJSStr,
+       stateJSRegexp:    tJSRegexp,
+       stateJSBlockCmt:  tBlockCmt,
+       stateJSLineCmt:   tLineCmt,
+       stateCSS:         tCSS,
+       stateCSSDqStr:    tCSSStr,
+       stateCSSSqStr:    tCSSStr,
+       stateCSSDqURL:    tCSSStr,
+       stateCSSSqURL:    tCSSStr,
+       stateCSSURL:      tCSSStr,
+       stateCSSBlockCmt: tBlockCmt,
+       stateCSSLineCmt:  tLineCmt,
+       stateError:       tError,
  }
  
  // tText is the context transition function for the text state.
@@ -337,6 +370,8 @@ func tTag(c context, s []byte) (context, []byte) {
                 state = stateURL
         } else if strings.HasPrefix(canonAttrName, "on") {
                 state = stateJS
+       } else if canonAttrName == "style" {
+               state = stateCSS
         }
  
         // Look for the start of the value.
@@ -376,7 +411,7 @@ func tAttr(c context, s []byte) (context, []byte) {
  func tURL(c context, s []byte) (context, []byte) {
         if bytes.IndexAny(s, "#?") >= 0 {
                 c.urlPart = urlPartQueryOrFrag
-       } else if c.urlPart == urlPartNone {
+       } else if len(s) != 0 && c.urlPart == urlPartNone {
                 c.urlPart = urlPartPreQuery
         }
         return c, nil
@@ -499,29 +534,50 @@ func tJSRegexp(c context, s []byte) (context, []byte) {
  
  var blockCommentEnd = []byte("*/")
  
-// tJSBlockCmt is the context transition function for the JS /*comment*/ state.
-func tJSBlockCmt(c context, s []byte) (context, []byte) {
-       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
-       // has been merged.
-
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, []byte) {
+       // TODO: look for </script or </style end tags.
         i := bytes.Index(s, blockCommentEnd)
         if i == -1 {
                 return c, nil
         }
-       c.state = stateJS
+       switch c.state {
+       case stateJSBlockCmt:
+               c.state = stateJS
+       case stateCSSBlockCmt:
+               c.state = stateCSS
+       default:
+               panic(c.state.String())
+       }
         return c, s[i+2:]
  }
  
-// tJSLineCmt is the context transition function for the JS //comment state.
-func tJSLineCmt(c context, s []byte) (context, []byte) {
-       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
-       // has been merged.
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, []byte) {
+       // TODO: look for </script or </style end tags.
+       var lineTerminators string
+       var endState state
+       switch c.state {
+       case stateJSLineCmt:
+               lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+       case stateCSSLineCmt:
+               lineTerminators, endState = "\n\f\r", stateCSS
+               // Line comments are not part of any published CSS standard but
+               // are supported by the 4 major browsers.
+               // This defines line comments as
+               //     LINECOMMENT ::= "//" [^\n\f\d]*
+               // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+               // newlines:
+               //     nl ::= #xA | #xD #xA | #xD | #xC
+       default:
+               panic(c.state.String())
+       }
  
-       i := bytes.IndexAny(s, "\r\n\u2028\u2029")
+       i := bytes.IndexAny(s, lineTerminators)
         if i == -1 {
                 return c, nil
         }
-       c.state = stateJS
+       c.state = endState
         // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
         // "However, the LineTerminator at the end of the line is not
         // considered to be part of the single-line comment; it is recognised
@@ -530,6 +586,124 @@ func tJSLineCmt(c context, s []byte) (context, []byte) {
         return c, s[i:]
  }
  
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, []byte) {
+       // TODO: look for </style
+
+       // CSS quoted strings are almost never used except for:
+       // (1) URLs as in background: "/foo.png"
+       // (2) Multiword font-names as in font-family: "Times New Roman"
+       // (3) List separators in content values as in inline-lists:
+       //    <style>
+       //    ul.inlineList { list-style: none; padding:0 }
+       //    ul.inlineList > li { display: inline }
+       //    ul.inlineList > li:before { content: ", " }
+       //    ul.inlineList > li:first-child:before { content: "" }
+       //    </style>
+       //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
+       // (4) Attribute value selectors as in a[href="http://example.com/"]
+       //
+       // We conservatively treat all strings as URLs, but make some
+       // allowances to avoid confusion.
+       //
+       // In (1), our conservative assumption is justified.
+       // In (2), valid font names do not contain ':', '?', or '#', so our
+       // conservative assumption is fine since we will never transition past
+       // urlPartPreQuery.
+       // In (3), our protocol heuristic should not be tripped, and there
+       // should not be non-space content after a '?' or '#', so as long as
+       // we only %-encode RFC 3986 reserved characters we are ok.
+       // In (4), we should URL escape for URL attributes, and for others we
+       // have the attribute name available if our conservative assumption
+       // proves problematic for real code.
+
+       for {
+               i := bytes.IndexAny(s, `("'/`)
+               if i == -1 {
+                       return c, nil
+               }
+               switch s[i] {
+               case '(':
+                       // Look for url to the left.
+                       p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+                       if endsWithCSSKeyword(p, "url") {
+                               q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+                               switch {
+                               case len(q) != 0 && q[0] == '"':
+                                       c.state, s = stateCSSDqURL, q[1:]
+                               case len(q) != 0 && q[0] == '\'':
+                                       c.state, s = stateCSSSqURL, q[1:]
+
+                               default:
+                                       c.state, s = stateCSSURL, q
+                               }
+                               return c, s
+                       }
+               case '/':
+                       if i+1 < len(s) {
+                               switch s[i+1] {
+                               case '/':
+                                       c.state = stateCSSLineCmt
+                                       return c, s[i+2:]
+                               case '*':
+                                       c.state = stateCSSBlockCmt
+                                       return c, s[i+2:]
+                               }
+                       }
+               case '"':
+                       c.state = stateCSSDqStr
+                       return c, s[i+1:]
+               case '\'':
+                       c.state = stateCSSSqStr
+                       return c, s[i+1:]
+               }
+               s = s[i+1:]
+       }
+       panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, []byte) {
+       // TODO: look for </style
+
+       var endAndEsc string
+       switch c.state {
+       case stateCSSDqStr, stateCSSDqURL:
+               endAndEsc = `\"`
+       case stateCSSSqStr, stateCSSSqURL:
+               endAndEsc = `\'`
+       case stateCSSURL:
+               // Unquoted URLs end with a newline or close parenthesis.
+               // The below includes the wc (whitespace character) and nl.
+               endAndEsc = "\\\t\n\f\r )"
+       default:
+               panic(c.state.String())
+       }
+
+       b := s
+       for {
+               i := bytes.IndexAny(b, endAndEsc)
+               if i == -1 {
+                       return tURL(c, decodeCSS(b))
+               }
+               if b[i] == '\\' {
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
+                               }, nil
+                       }
+               } else {
+                       c.state = stateCSS
+                       return c, b[i+1:]
+               }
+               c, _ = tURL(c, decodeCSS(b[:i+1]))
+               b = b[i+1:]
+       }
+       panic("unreachable")
+}
+
  // tError is the context transition function for the error state.
  func tError(c context, s []byte) (context, []byte) {
         return c, nil
@@ -612,28 +786,3 @@ var urlAttr = map[string]bool{
         "src":        true,
         "usemap":     true,
  }
-
-// urlFilter returns the HTML equivalent of its input unless it contains an
-// unsafe protocol in which case it defangs the entire URL.
-func urlFilter(args ...interface{}) string {
-       ok := false
-       var s string
-       if len(args) == 1 {
-               s, ok = args[0].(string)
-       }
-       if !ok {
-               s = fmt.Sprint(args...)
-       }
-       i := strings.IndexRune(s, ':')
-       if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
-               protocol := strings.ToLower(s[:i])
-               if protocol != "http" && protocol != "https" && protocol != "mailto" {
-                       // Return a value that someone investigating a bug
-                       // report can put into a search engine.
-                       return "#ZgotmplZ"
-               }
-       }
-       // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
-       // we will need to stop this from HTML escaping and pipeline sanitizers.
-       return template.HTMLEscapeString(s)
-}
diff --git a/src/pkg/exp/template/html/escape_test.go b/src/pkg/exp/template/html/escape_test.go

index 6f5ecf6ef3e84959189bd7f0d55a7eaa05a87b78..3294323409e3efe020f8ec451a65ae061faa09ba 100644 (file)
--- a/src/pkg/exp/template/html/escape_test.go
+++ b/src/pkg/exp/template/html/escape_test.go
@@ -82,14 +82,9 @@ func TestEscape(t *testing.T) {
                         "true",
                 },
                 {
-                       // TODO: Make sure the URL escaper escapes single quotes so it can
-                       // be embedded in single quoted URI attributes and CSS url(...)
-                       // constructs. Single quotes are reserved in URLs, but are only used
-                       // in the obsolete "mark" rule in an appendix in RFC 3986 so can be
-                       // safely encoded.
                         "constant",
                         `<a href="/search?q={{"'a<b'"}}">`,
-                       `<a href="/search?q='a%3Cb'">`,
+                       `<a href="/search?q=%27a%3cb%27">`,
                 },
                 {
                         "multipleAttrs",
@@ -121,6 +116,11 @@ func TestEscape(t *testing.T) {
                         `<a href='{{"javascript:alert(%22pwned%22)"}}'>`,
                         `<a href='#ZgotmplZ'>`,
                 },
+               {
+                       "nonHierURL",
+                       `<a href={{"mailto:Muhammed \"The Greatest\" Ali <m.ali@example.com>"}}>`,
+                       `<a href=mailto:Muhammed&#32;&#34;The&#32;Greatest&#34;&#32;Ali&#32;&lt;m.ali@example.com&gt;>`,
+               },
                 {
                         "urlPath",
                         `<a href='http://{{"javascript:80"}}/foo'>`,
@@ -129,12 +129,12 @@ func TestEscape(t *testing.T) {
                 {
                         "urlQuery",
                         `<a href='/search?q={{.H}}'>`,
-                       `<a href='/search?q=%3CHello%3E'>`,
+                       `<a href='/search?q=%3cHello%3e'>`,
                 },
                 {
                         "urlFragment",
                         `<a href='/faq#{{.H}}'>`,
-                       `<a href='/faq#%3CHello%3E'>`,
+                       `<a href='/faq#%3cHello%3e'>`,
                 },
                 {
                         "urlBranch",
@@ -144,7 +144,7 @@ func TestEscape(t *testing.T) {
                 {
                         "urlBranchConflictMoot",
                         `<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
-                       `<a href="/foo?a=%3CCincinatti%3E">`,
+                       `<a href="/foo?a=%3cCincinatti%3e">`,
                 },
                 {
                         "jsStrValue",
@@ -192,6 +192,138 @@ func TestEscape(t *testing.T) {
                         "<button onclick='alert(&quot;{{.H}}&quot;)'>",
                         `<button onclick='alert(&quot;\x3cHello\x3e&quot;)'>`,
                 },
+               {
+                       "styleBidiKeywordPassed",
+                       `<p style="dir: {{"ltr"}}">`,
+                       `<p style="dir: ltr">`,
+               },
+               {
+                       "styleBidiPropNamePassed",
+                       `<p style="border-{{"left"}}: 0; border-{{"right"}}: 1in">`,
+                       `<p style="border-left: 0; border-right: 1in">`,
+               },
+               {
+                       "styleExpressionBlocked",
+                       `<p style="width: {{"expression(alert(1337))"}}">`,
+                       `<p style="width: ZgotmplZ">`,
+               },
+               {
+                       "styleTagSelectorPassed",
+                       `<style>{{"p"}} { color: pink }</style>`,
+                       `<style>p { color: pink }</style>`,
+               },
+               {
+                       "styleIDPassed",
+                       `<style>p{{"#my-ID"}} { font: Arial }`,
+                       `<style>p#my-ID { font: Arial }`,
+               },
+               {
+                       "styleClassPassed",
+                       `<style>p{{".my_class"}} { font: Arial }`,
+                       `<style>p.my_class { font: Arial }`,
+               },
+               {
+                       "styleQuantityPassed",
+                       `<a style="left: {{"2em"}}; top: {{0}}">`,
+                       `<a style="left: 2em; top: 0">`,
+               },
+               {
+                       "stylePctPassed",
+                       `<table style=width:{{"100%"}}>`,
+                       `<table style=width:100%>`,
+               },
+               {
+                       "styleColorPassed",
+                       `<p style="color: {{"#8ff"}}; background: {{"#000"}}">`,
+                       `<p style="color: #8ff; background: #000">`,
+               },
+               {
+                       "styleObfuscatedExpressionBlocked",
+                       `<p style="width: {{"  e\78preS\0Sio/**/n(alert(1337))"}}">`,
+                       `<p style="width: ZgotmplZ">`,
+               },
+               {
+                       "styleMozBindingBlocked",
+                       `<p style="{{"-moz-binding(alert(1337))"}}: ...">`,
+                       `<p style="ZgotmplZ: ...">`,
+               },
+               {
+                       "styleObfuscatedMozBindingBlocked",
+                       `<p style="{{"  -mo\7a-B\0I/**/nding(alert(1337))"}}: ...">`,
+                       `<p style="ZgotmplZ: ...">`,
+               },
+               {
+                       "styleFontNameString",
+                       `<p style='font-family: "{{"Times New Roman"}}"'>`,
+                       `<p style='font-family: "Times New Roman"'>`,
+               },
+               {
+                       "styleFontNameString",
+                       `<p style='font-family: "{{"Times New Roman"}}", "{{"sans-serif"}}"'>`,
+                       `<p style='font-family: "Times New Roman", "sans-serif"'>`,
+               },
+               {
+                       "styleFontNameUnquoted",
+                       `<p style='font-family: {{"Times New Roman"}}'>`,
+                       `<p style='font-family: Times New Roman'>`,
+               },
+               {
+                       "styleURLQueryEncoded",
+                       `<p style="background: url(/img?name={{"O'Reilly Animal(1)<2>.png"}})">`,
+                       `<p style="background: url(/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png)">`,
+               },
+               {
+                       "styleQuotedURLQueryEncoded",
+                       `<p style="background: url('/img?name={{"O'Reilly Animal(1)<2>.png"}}')">`,
+                       `<p style="background: url('/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png')">`,
+               },
+               {
+                       "styleStrQueryEncoded",
+                       `<p style="background: '/img?name={{"O'Reilly Animal(1)<2>.png"}}'">`,
+                       `<p style="background: '/img?name=O%27Reilly%20Animal%281%29%3c2%3e.png'">`,
+               },
+               {
+                       "styleURLBadProtocolBlocked",
+                       `<a style="background: url('{{"javascript:alert(1337)"}}')">`,
+                       `<a style="background: url('#ZgotmplZ')">`,
+               },
+               {
+                       "styleStrBadProtocolBlocked",
+                       `<a style="background: '{{"javascript:alert(1337)"}}'">`,
+                       `<a style="background: '#ZgotmplZ'">`,
+               },
+               {
+                       "styleURLGoodProtocolPassed",
+                       `<a style="background: url('{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}')">`,
+                       `<a style="background: url('http://oreilly.com/O%27Reilly%20Animals%281%29%3c2%3e;%7b%7d.html')">`,
+               },
+               {
+                       "styleStrGoodProtocolPassed",
+                       `<a style="background: '{{"http://oreilly.com/O'Reilly Animals(1)<2>;{}.html"}}'">`,
+                       `<a style="background: 'http\3a\2f\2foreilly.com\2fO\27Reilly Animals\28 1\29\3c 2\3e\3b\7b\7d.html'">`,
+               },
+               {
+                       "styleURLMixedCase",
+                       `<p style="background: URL(#{{.H}})">`,
+                       `<p style="background: URL(#%3cHello%3e)">`,
+               },
+               {
+                       "stylePropertyPairPassed",
+                       `<a style='{{"color: red"}}'>`,
+                       `<a style='color: red'>`,
+               },
+               {
+                       "styleStrSpecialsEncoded",
+                       `<a style="font-family: '{{"/**/'\";:// \\"}}', &quot;{{"/**/'\";:// \\"}}&quot;">`,
+                       `<a style="font-family: '\2f**\2f\27\22\3b\3a\2f\2f \\', &quot;\2f**\2f\27\22\3b\3a\2f\2f \\&quot;">`,
+               },
+               {
+                       "styleURLSpecialsEncoded",
+                       // TODO: Find out what IE does with url(/*foo*/bar)
+                       // FF, Chrome, and Safari seem to treat it as a URL.
+                       `<a style="border-image: url({{"/**/'\";:// \\"}}), url(&quot;{{"/**/'\";:// \\"}}&quot;), url('{{"/**/'\";:// \\"}}'), 'http://www.example.com/?q={{"/**/'\";:// \\"}}''">`,
+                       `<a style="border-image: url(/**/%27%22;://%20%5c), url(&quot;/**/%27%22;://%20%5c&quot;), url('/**/%27%22;://%20%5c'), 'http://www.example.com/?q=%2f%2a%2a%2f%27%22%3b%3a%2f%2f%20%5c''">`,
+               },
         }
  
         for _, test := range tests {
@@ -299,11 +431,19 @@ func TestErrors(t *testing.T) {
                         `unfinished JS regexp charset: "foo[\\]/"`,
                 },
                 {
-                       `<a onclick="/* alert({{.X}} */">`,
+                       `<a onclick="/* alert({{.X}}) */">`,
                         `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
                 },
                 {
-                       `<a onclick="// alert({{.X}}">`,
+                       `<a onclick="// alert({{.X}})">`,
+                       `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+               },
+               {
+                       `<a style="/* color: {{.X}} */">`,
+                       `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+               },
+               {
+                       `<a style="// color: {{.X}}">`,
                         `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
                 },
         }
@@ -533,6 +673,98 @@ func TestEscapeText(t *testing.T) {
                         `<a onclick="/foo\/`,
                         context{state: stateJSRegexp, delim: delimDoubleQuote},
                 },
+               {
+                       `<a onclick="/foo/`,
+                       context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<input checked style="`,
+                       context{state: stateCSS, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="//`,
+                       context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="//</script>`,
+                       context{state: stateCSSLineCmt, delim: delimDoubleQuote},
+               },
+               {
+                       "<a style='//\n",
+                       context{state: stateCSS, delim: delimSingleQuote},
+               },
+               {
+                       "<a style='//\r",
+                       context{state: stateCSS, delim: delimSingleQuote},
+               },
+               {
+                       `<a style="/*`,
+                       context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="/*/`,
+                       context{state: stateCSSBlockCmt, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="/**/`,
+                       context{state: stateCSS, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: '`,
+                       context{state: stateCSSSqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: &quot;`,
+                       context{state: stateCSSDqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: '/foo?img=`,
+                       context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+               },
+               {
+                       `<a style="background: '/`,
+                       context{state: stateCSSSqStr, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url(&#x22;/`,
+                       context{state: stateCSSDqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url('/`,
+                       context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url('/)`,
+                       context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url('/ `,
+                       context{state: stateCSSSqURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url(/`,
+                       context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartPreQuery},
+               },
+               {
+                       `<a style="background: url( `,
+                       context{state: stateCSSURL, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: url( /image?name=`,
+                       context{state: stateCSSURL, delim: delimDoubleQuote, urlPart: urlPartQueryOrFrag},
+               },
+               {
+                       `<a style="background: url(x)`,
+                       context{state: stateCSS, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: url('x'`,
+                       context{state: stateCSS, delim: delimDoubleQuote},
+               },
+               {
+                       `<a style="background: url( x `,
+                       context{state: stateCSS, delim: delimDoubleQuote},
+               },
         }
  
         for _, test := range tests {
diff --git a/src/pkg/exp/template/html/html.go b/src/pkg/exp/template/html/html.go

new file mode 100644 (file)

index 0000000..0523322
--- /dev/null
+++ b/src/pkg/exp/template/html/html.go
@@ -0,0 +1,101 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "utf8"
+)
+
+// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
+func htmlNospaceEscaper(args ...interface{}) string {
+       s := stringify(args...)
+       // The set of runes escaped is the union of the HTML specials and
+       // those determined by running the JS below in browsers:
+
+       // <div id=d></div>
+       // <script>(function () {
+       // var a = [], d = document.getElementById("d"), i, c, s;
+       // for (i = 0; i < 0x10000; ++i) {
+       //   c = String.fromCharCode(i);
+       //   d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
+       //   s = d.getElementsByTagName("SPAN")[0];
+       //   if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
+       // }
+       // document.write(a.join(", "));
+       // })()</script>
+
+       var b bytes.Buffer
+       written := 0
+       for i, r := range s {
+               var repl string
+               switch r {
+               case 0:
+                       // http://www.w3.org/TR/html5/tokenization.html#attribute-value-unquoted-state: "
+                       // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
+                       // CHARACTER character to the current attribute's value.
+                       // "
+                       // and similarly
+                       // http://www.w3.org/TR/html5/tokenization.html#before-attribute-value-state
+                       repl = "\uFFFD"
+               case '\t':
+                       repl = "&#9;"
+               case '\n':
+                       repl = "&#10;"
+               case '\v':
+                       repl = "&#11;"
+               case '\f':
+                       repl = "&#12;"
+               case '\r':
+                       repl = "&#13;"
+               case ' ':
+                       repl = "&#32;"
+               case '"':
+                       repl = "&#34;"
+               case '&':
+                       repl = "&amp;"
+               case '\'':
+                       repl = "&#39;"
+               case '+':
+                       repl = "&#43;"
+               case '<':
+                       repl = "&lt;"
+               case '=':
+                       repl = "&#61;"
+               case '>':
+                       repl = "&gt;"
+               case '`':
+                       // A parse error in the attribute value (unquoted) and 
+                       // before attribute value states.
+                       // Treated as a quoting character by IE.
+                       repl = "&#96;"
+               default:
+                       // IE does not allow the ranges below raw in attributes.
+                       if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
+                               b.WriteString(s[written:i])
+                               b.WriteString("&#x")
+                               b.WriteByte("0123456789abcdef"[r>>24])
+                               b.WriteByte("0123456789abcdef"[r>>16&0xf])
+                               b.WriteByte("0123456789abcdef"[r>>8&0xf])
+                               b.WriteByte("0123456789abcdef"[r&0xf])
+                               b.WriteByte(';')
+                               fmt.Fprintf(&b, "&#x%x;", r)
+                               written = i + utf8.RuneLen(r)
+                       }
+                       continue
+               }
+               b.WriteString(s[written:i])
+               b.WriteString(repl)
+               // Valid as long as we don't include any cases above in the
+               // 0x80-0xff range.
+               written = i + utf8.RuneLen(r)
+       }
+       if written == 0 {
+               return s
+       }
+       b.WriteString(s[written:])
+       return b.String()
+}
diff --git a/src/pkg/exp/template/html/html_test.go b/src/pkg/exp/template/html/html_test.go

new file mode 100644 (file)

index 0000000..2b118c5
--- /dev/null
+++ b/src/pkg/exp/template/html/html_test.go
@@ -0,0 +1,57 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "html"
+       "strings"
+       "testing"
+)
+
+func TestHTMLNospaceEscaper(t *testing.T) {
+       input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               ` !"#$%&'()*+,-./` +
+               `0123456789:;<=>?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\]^_` +
+               "`abcdefghijklmno" +
+               "pqrstuvwxyz{|}~\x7f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       want := ("\ufffd\x01\x02\x03\x04\x05\x06\x07" +
+               "\x08&#9;&#10;&#11;&#12;&#13;\x0E\x0F" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17" +
+               "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               `&#32;!&#34;#$%&amp;&#39;()*&#43;,-./` +
+               `0123456789:;&lt;&#61;&gt;?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\]^_` +
+               `&#96;abcdefghijklmno` +
+               `pqrstuvwxyz{|}~` + "\u007f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       got := htmlNospaceEscaper(input)
+       if got != want {
+               t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got)
+       }
+
+       got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1)
+       if want != got {
+               t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got)
+       }
+}
+
+func BenchmarkHTMLNospaceEscaper(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               htmlNospaceEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+       }
+}
+
+func BenchmarkHTMLNospaceEscaperNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               htmlNospaceEscaper("The_quick,_brown_fox_jumps_over_the_lazy_dog.")
+       }
+}
diff --git a/src/pkg/exp/template/html/js.go b/src/pkg/exp/template/html/js.go

index 65479bc13e254336e2313df73f097ed7a423e9af..41476519036bd95340427c2238fe0b29740280a4 100644 (file)
--- a/src/pkg/exp/template/html/js.go
+++ b/src/pkg/exp/template/html/js.go
@@ -269,7 +269,7 @@ var jsRegexpReplacementTable = []string{
         '}':  `\}`,
  }
  
-// isJSIdentPart is true if the given rune is a JS identifier part.
+// isJSIdentPart returns whether the given rune is a JS identifier part.
  // It does not handle all the non-Latin letters, joiners, and combining marks,
  // but it does handle every codepoint that can occur in a numeric literal or
  // a keyword.
diff --git a/src/pkg/exp/template/html/url.go b/src/pkg/exp/template/html/url.go

new file mode 100644 (file)

index 0000000..768fedb
--- /dev/null
+++ b/src/pkg/exp/template/html/url.go
@@ -0,0 +1,104 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "strings"
+)
+
+// urlFilter returns the HTML equivalent of its input unless it contains an
+// unsafe protocol in which case it defangs the entire URL.
+func urlFilter(args ...interface{}) string {
+       s := stringify(args...)
+       i := strings.IndexRune(s, ':')
+       if i >= 0 && strings.IndexRune(s[:i], '/') < 0 {
+               protocol := strings.ToLower(s[:i])
+               if protocol != "http" && protocol != "https" && protocol != "mailto" {
+                       // Return a value that someone investigating a bug
+                       // report can put into a search engine.
+                       return "#" + filterFailsafe
+               }
+       }
+       // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style>
+       // we will need to stop this from HTML escaping and pipeline sanitizers.
+       return s
+}
+
+// urlEscaper produces an output that can be embedded in a URL query.
+// The output can be embedded in an HTML attribute without further escaping.
+func urlEscaper(args ...interface{}) string {
+       return urlProcessor(false, args...)
+}
+
+// urlEscaper normalizes URL content so it can be embedded in a quote-delimited
+// string or parenthesis delimited url(...).
+// The normalizer does not encode all HTML specials.  Specifically, it does not
+// encode '&' so correct embedding in an HTML attribute requires escaping of
+// '&' to '&amp;'.
+func urlNormalizer(args ...interface{}) string {
+       return urlProcessor(true, args...)
+}
+
+// urlProcessor normalizes (when norm is true) or escapes its input to produce
+// a valid hierarchical or opaque URL part.
+func urlProcessor(norm bool, args ...interface{}) string {
+       s := stringify(args...)
+       var b bytes.Buffer
+       written := 0
+       // The byte loop below assumes that all URLs use UTF-8 as the
+       // content-encoding. This is similar to the URI to IRI encoding scheme
+       // defined in section 3.1 of  RFC 3987, and behaves the same as the
+       // EcmaScript builtin encodeURIComponent.
+       // It should not cause any misencoding of URLs in pages with
+       // Content-type: text/html;charset=UTF-8.
+       for i, n := 0, len(s); i < n; i++ {
+               c := s[i]
+               switch c {
+               // Single quote and parens are sub-delims in RFC 3986, but we
+               // escape them so the output can be embedded in in single
+               // quoted attributes and unquoted CSS url(...) constructs.
+               // Single quotes are reserved in URLs, but are only used in
+               // the obsolete "mark" rule in an appendix in RFC 3986
+               // so can be safely encoded.
+               case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
+                       if norm {
+                               continue
+                       }
+               // Unreserved according to RFC 3986 sec 2.3
+               // "For consistency, percent-encoded octets in the ranges of
+               // ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+               // period (%2E), underscore (%5F), or tilde (%7E) should not be
+               // created by URI producers
+               case '-', '.', '_', '~':
+                       continue
+               case '%':
+                       // When normalizing do not re-encode valid escapes.
+                       if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
+                               continue
+                       }
+               default:
+                       // Unreserved according to RFC 3986 sec 2.3
+                       if 'a' <= c && c <= 'z' {
+                               continue
+                       }
+                       if 'A' <= c && c <= 'Z' {
+                               continue
+                       }
+                       if '0' <= c && c <= '9' {
+                               continue
+                       }
+               }
+               b.WriteString(s[written:i])
+               fmt.Fprintf(&b, "%%%02x", c)
+               written = i + 1
+       }
+       if written == 0 {
+               return s
+       }
+       b.WriteString(s[written:])
+       return b.String()
+}
diff --git a/src/pkg/exp/template/html/url_test.go b/src/pkg/exp/template/html/url_test.go

new file mode 100644 (file)

index 0000000..b846231
--- /dev/null
+++ b/src/pkg/exp/template/html/url_test.go
@@ -0,0 +1,112 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "testing"
+)
+
+func TestURLNormalizer(t *testing.T) {
+       tests := []struct {
+               url, want string
+       }{
+               {"", ""},
+               {
+                       "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+                       "http://example.com:80/foo/bar?q=foo%20&bar=x+y#frag",
+               },
+               {" ", "%20"},
+               {"%7c", "%7c"},
+               {"%7C", "%7C"},
+               {"%2", "%252"},
+               {"%", "%25"},
+               {"%z", "%25z"},
+               {"/foo|bar/%5c\u1234", "/foo%7cbar/%5c%e1%88%b4"},
+       }
+       for _, test := range tests {
+               if got := urlNormalizer(test.url); test.want != got {
+                       t.Errorf("%q: want\n\t%q\nbut got\n\t%q", test.url, test.want, got)
+               }
+               if test.want != urlNormalizer(test.want) {
+                       t.Errorf("not idempotent: %q", test.want)
+               }
+       }
+}
+
+func TestURLFilters(t *testing.T) {
+       input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               ` !"#$%&'()*+,-./` +
+               `0123456789:;<=>?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\]^_` +
+               "`abcdefghijklmno" +
+               "pqrstuvwxyz{|}~\x7f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       tests := []struct {
+               name    string
+               escaper func(...interface{}) string
+               escaped string
+       }{
+               {
+                       "urlEscaper",
+                       urlEscaper,
+                       "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+                               "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+                               "%20%21%22%23%24%25%26%27%28%29%2a%2b%2c-.%2f" +
+                               "0123456789%3a%3b%3c%3d%3e%3f" +
+                               "%40ABCDEFGHIJKLMNO" +
+                               "PQRSTUVWXYZ%5b%5c%5d%5e_" +
+                               "%60abcdefghijklmno" +
+                               "pqrstuvwxyz%7b%7c%7d~%7f" +
+                               "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+               },
+               {
+                       "urlNormalizer",
+                       urlNormalizer,
+                       "%00%01%02%03%04%05%06%07%08%09%0a%0b%0c%0d%0e%0f" +
+                               "%10%11%12%13%14%15%16%17%18%19%1a%1b%1c%1d%1e%1f" +
+                               "%20!%22#$%25&%27%28%29*+,-./" +
+                               "0123456789:;%3c=%3e?" +
+                               "@ABCDEFGHIJKLMNO" +
+                               "PQRSTUVWXYZ[%5c]%5e_" +
+                               "%60abcdefghijklmno" +
+                               "pqrstuvwxyz%7b%7c%7d~%7f" +
+                               "%c2%a0%c4%80%e2%80%a8%e2%80%a9%ef%bb%bf%f0%9d%84%9e",
+               },
+       }
+
+       for _, test := range tests {
+               if s := test.escaper(input); s != test.escaped {
+                       t.Errorf("%s: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+                       continue
+               }
+       }
+}
+
+func BenchmarkURLEscaper(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               urlEscaper("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+       }
+}
+
+func BenchmarkURLEscaperNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               urlEscaper("TheQuickBrownFoxJumpsOverTheLazyDog.")
+       }
+}
+
+func BenchmarkURLNormalizer(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               urlNormalizer("The quick brown fox jumps over the lazy dog.\n")
+       }
+}
+
+func BenchmarkURLNormalizerNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               urlNormalizer("http://example.com:80/foo?q=bar%20&baz=x+y#frag")
+       }
+}
author	Mike Samuel <mikesamuel@gmail.com>
	Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)
committer	Nigel Tao <nigeltao@golang.org>
	Thu, 8 Sep 2011 21:18:20 +0000 (07:18 +1000)
src/pkg/exp/template/html/Makefile		patch \| blob \| history
src/pkg/exp/template/html/context.go		patch \| blob \| history
src/pkg/exp/template/html/css.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/css_test.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/escape.go		patch \| blob \| history
src/pkg/exp/template/html/escape_test.go		patch \| blob \| history
src/pkg/exp/template/html/html.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/html_test.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/js.go		patch \| blob \| history
src/pkg/exp/template/html/url.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/url_test.go	[new file with mode: 0644]	patch \| blob