From: Didier Spezia Date: Thu, 14 May 2015 22:36:59 +0000 (+0000) Subject: html/template: fix string iteration in replacement operations X-Git-Tag: go1.5beta1~522 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=a1c1a763bc7c8d10ec30a7fa60ecf7d5f9a6f1c8;p=gostls13.git html/template: fix string iteration in replacement operations In css, js, and html, the replacement operations are implemented by iterating on strings (rune by rune). The for/range statement is used. The length of the rune is required and added to the index to properly slice the string. This is potentially wrong because there is a discrepancy between the result of utf8.RuneLen and the increment of the index (set by the for/range statement). For invalid strings, utf8.RuneLen('\ufffd') == 3, while the index is incremented only by 1 byte. htmlReplacer triggers a panic at slicing time for some invalid strings. Use a more robust iteration mechanism based on utf8.DecodeRuneInString, and make sure the same pattern is used for all similar functions in this package. Fixes #10799 Change-Id: Ibad3857b2819435d9fa564f06fc2ca8774102841 Reviewed-on: https://go-review.googlesource.com/10105 Reviewed-by: Rob Pike --- diff --git a/src/html/template/css.go b/src/html/template/css.go index 634f183f79..318464835f 100644 --- a/src/html/template/css.go +++ b/src/html/template/css.go @@ -157,56 +157,20 @@ func isCSSSpace(b byte) bool { func cssEscaper(args ...interface{}) string { s, _ := stringify(args...) var b bytes.Buffer - written := 0 - for i, r := range s { + r, w, written := rune(0), 0, 0 + for i := 0; i < len(s); i += w { + // See comment in htmlEscaper. + r, w = utf8.DecodeRuneInString(s[i:]) var repl string - switch r { - case 0: - repl = `\0` - case '\t': - repl = `\9` - case '\n': - repl = `\a` - case '\f': - repl = `\c` - case '\r': - repl = `\d` - // Encode HTML specials as hex so the output can be embedded - // in HTML attributes without further encoding. - case '"': - repl = `\22` - case '&': - repl = `\26` - case '\'': - repl = `\27` - case '(': - repl = `\28` - case ')': - repl = `\29` - case '+': - repl = `\2b` - case '/': - repl = `\2f` - case ':': - repl = `\3a` - case ';': - repl = `\3b` - case '<': - repl = `\3c` - case '>': - repl = `\3e` - case '\\': - repl = `\\` - case '{': - repl = `\7b` - case '}': - repl = `\7d` + switch { + case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "": + repl = cssReplacementTable[r] default: continue } b.WriteString(s[written:i]) b.WriteString(repl) - written = i + utf8.RuneLen(r) + written = i + w if repl != `\\` && (written == len(s) || isHex(s[written]) || isCSSSpace(s[written])) { b.WriteByte(' ') } @@ -218,6 +182,30 @@ func cssEscaper(args ...interface{}) string { return b.String() } +var cssReplacementTable = []string{ + 0: `\0`, + '\t': `\9`, + '\n': `\a`, + '\f': `\c`, + '\r': `\d`, + // Encode HTML specials as hex so the output can be embedded + // in HTML attributes without further encoding. + '"': `\22`, + '&': `\26`, + '\'': `\27`, + '(': `\28`, + ')': `\29`, + '+': `\2b`, + '/': `\2f`, + ':': `\3a`, + ';': `\3b`, + '<': `\3c`, + '>': `\3e`, + '\\': `\\`, + '{': `\7b`, + '}': `\7d`, +} + var expressionBytes = []byte("expression") var mozBindingBytes = []byte("mozbinding") diff --git a/src/html/template/html.go b/src/html/template/html.go index 9c069efd1d..de4aa4abb2 100644 --- a/src/html/template/html.go +++ b/src/html/template/html.go @@ -138,21 +138,24 @@ var htmlNospaceNormReplacementTable = []string{ // and when badRunes is true, certain bad runes are allowed through unescaped. func htmlReplacer(s string, replacementTable []string, badRunes bool) string { written, b := 0, new(bytes.Buffer) - for i, r := range s { + r, w := rune(0), 0 + for i := 0; i < len(s); i += w { + // Cannot use 'for range s' because we need to preserve the width + // of the runes in the input. If we see a decoding error, the input + // width will not be utf8.Runelen(r) and we will overrun the buffer. + r, w = utf8.DecodeRuneInString(s[i:]) if int(r) < len(replacementTable) { if repl := replacementTable[r]; len(repl) != 0 { b.WriteString(s[written:i]) b.WriteString(repl) - // Valid as long as replacementTable doesn't - // include anything above 0x7f. - written = i + utf8.RuneLen(r) + written = i + w } } else if badRunes { // No-op. // IE does not allow these ranges in unquoted attrs. } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) - written = i + utf8.RuneLen(r) + written = i + w } } if written == 0 { diff --git a/src/html/template/html_test.go b/src/html/template/html_test.go index b9b9703875..f04ee04c27 100644 --- a/src/html/template/html_test.go +++ b/src/html/template/html_test.go @@ -19,7 +19,8 @@ func TestHTMLNospaceEscaper(t *testing.T) { `PQRSTUVWXYZ[\]^_` + "`abcdefghijklmno" + "pqrstuvwxyz{|}~\x7f" + - "\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E") + "\u00A0\u0100\u2028\u2029\ufeff\ufdec\U0001D11E" + + "erroneous\x960") // keep at the end want := ("�\x01\x02\x03\x04\x05\x06\x07" + "\x08 \x0E\x0F" + @@ -31,14 +32,16 @@ func TestHTMLNospaceEscaper(t *testing.T) { `PQRSTUVWXYZ[\]^_` + ``abcdefghijklmno` + `pqrstuvwxyz{|}~` + "\u007f" + - "\u00A0\u0100\u2028\u2029\ufeff﷬\U0001D11E") + "\u00A0\u0100\u2028\u2029\ufeff﷬\U0001D11E" + + "erroneous�0") // keep at the end got := htmlNospaceEscaper(input) if got != want { t.Errorf("encode: want\n\t%q\nbut got\n\t%q", want, got) } - got, want = html.UnescapeString(got), strings.Replace(input, "\x00", "\ufffd", 1) + r := strings.NewReplacer("\x00", "\ufffd", "\x96", "\ufffd") + got, want = html.UnescapeString(got), r.Replace(input) if want != got { t.Errorf("decode: want\n\t%q\nbut got\n\t%q", want, got) } diff --git a/src/html/template/js.go b/src/html/template/js.go index 999a61ed07..f6d166b311 100644 --- a/src/html/template/js.go +++ b/src/html/template/js.go @@ -246,8 +246,10 @@ func jsRegexpEscaper(args ...interface{}) string { // `\u2029`. func replace(s string, replacementTable []string) string { var b bytes.Buffer - written := 0 - for i, r := range s { + r, w, written := rune(0), 0, 0 + for i := 0; i < len(s); i += w { + // See comment in htmlEscaper. + r, w = utf8.DecodeRuneInString(s[i:]) var repl string switch { case int(r) < len(replacementTable) && replacementTable[r] != "": @@ -261,7 +263,7 @@ func replace(s string, replacementTable []string) string { } b.WriteString(s[written:i]) b.WriteString(repl) - written = i + utf8.RuneLen(r) + written = i + w } if written == 0 { return s