]> Cypherpunks repositories - gostls13.git/commitdiff
exp/html: implement escaping and double-escaping in scripts
authorAndrew Balholm <andybalholm@gmail.com>
Wed, 1 Aug 2012 04:45:35 +0000 (14:45 +1000)
committerNigel Tao <nigeltao@golang.org>
Wed, 1 Aug 2012 04:45:35 +0000 (14:45 +1000)
The text inside <script> tags is not ordinary raw text; there are all sorts
of other complications. This CL implements those complications.

Pass 76 additional tests.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6443070

src/pkg/exp/html/parse_test.go
src/pkg/exp/html/testlogs/scriptdata01.dat.log
src/pkg/exp/html/testlogs/tests16.dat.log
src/pkg/exp/html/token.go

index 18389b27d6f726e6e623bce8470c44f4ffe291bf..2e8dfbf1072fe1436a4fe3d3b60322d81057c78f 100644 (file)
@@ -389,6 +389,23 @@ var renderTestBlacklist = map[string]bool{
        // A <plaintext> element is reparented, putting it before a table.
        // A <plaintext> element can't have anything after it in HTML.
        `<table><plaintext><td>`: true,
+       // A script that ends at EOF may escape its own closing tag when rendered.
+       `<!doctype html><script><!--<script `:          true,
+       `<!doctype html><script><!--<script <a`:        true,
+       `<!doctype html><script><!--<script </script`:  true,
+       `<!doctype html><script><!--<script </scripta`: true,
+       `<!doctype html><script><!--<script -`:         true,
+       `<!doctype html><script><!--<script -a`:        true,
+       `<!doctype html><script><!--<script --`:        true,
+       `<!doctype html><script><!--<script --a`:       true,
+       `<script><!--<script `:                         true,
+       `<script><!--<script <a`:                       true,
+       `<script><!--<script </script`:                 true,
+       `<script><!--<script </scripta`:                true,
+       `<script><!--<script -`:                        true,
+       `<script><!--<script -a`:                       true,
+       `<script><!--<script --`:                       true,
+       `<script><!--<script --a`:                      true,
 }
 
 func TestNodeConsistency(t *testing.T) {
index 85b9284d5100b292157c968e9025110cd0e97ec9..ff74927df457078661ab0112acff9de43d61d043 100644 (file)
@@ -14,13 +14,13 @@ PASS "FOO<script>'<!-->'</script>BAR"
 PASS "FOO<script>'<!-->'</script>BAR"
 PASS "FOO<script>'<!-- potato'</script>BAR"
 PASS "FOO<script>'<!-- <sCrIpt'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt>'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt> -'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt> --'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt>'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt> -'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt> --'</script>BAR"
 PASS "FOO<script>'<!-- <sCrIpt> -->'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt> --!>'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt> -- >'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt '</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt/'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt> --!>'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt> -- >'</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt '</script>BAR"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt/'</script>BAR"
 PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt\\'</script>BAR"
-FAIL "FOO<script type=\"text/plain\">'<!-- <sCrIpt/'</script>BAR</script>QUX"
+PASS "FOO<script type=\"text/plain\">'<!-- <sCrIpt/'</script>BAR</script>QUX"
index 670e6c39afbf33e2786d681f024700884812c277..4f1e2119620e33f9bedf958257c8bbbaed52a450 100644 (file)
@@ -1,19 +1,19 @@
 PASS "<!doctype html><script>"
 PASS "<!doctype html><script>a"
-PARSE "<!doctype html><script><"
-PARSE "<!doctype html><script></"
-PARSE "<!doctype html><script></S"
-PARSE "<!doctype html><script></SC"
-PARSE "<!doctype html><script></SCR"
-PARSE "<!doctype html><script></SCRI"
-PARSE "<!doctype html><script></SCRIP"
+PASS "<!doctype html><script><"
+PASS "<!doctype html><script></"
+PASS "<!doctype html><script></S"
+PASS "<!doctype html><script></SC"
+PASS "<!doctype html><script></SCR"
+PASS "<!doctype html><script></SCRI"
+PASS "<!doctype html><script></SCRIP"
 PASS "<!doctype html><script></SCRIPT"
 PASS "<!doctype html><script></SCRIPT "
-PARSE "<!doctype html><script></s"
-PARSE "<!doctype html><script></sc"
-PARSE "<!doctype html><script></scr"
-PARSE "<!doctype html><script></scri"
-PARSE "<!doctype html><script></scrip"
+PASS "<!doctype html><script></s"
+PASS "<!doctype html><script></sc"
+PASS "<!doctype html><script></scr"
+PASS "<!doctype html><script></scri"
+PASS "<!doctype html><script></scrip"
 PASS "<!doctype html><script></script"
 PASS "<!doctype html><script></script "
 PASS "<!doctype html><script><!"
@@ -22,9 +22,9 @@ PASS "<!doctype html><script><!-"
 PASS "<!doctype html><script><!-a"
 PASS "<!doctype html><script><!--"
 PASS "<!doctype html><script><!--a"
-PARSE "<!doctype html><script><!--<"
+PASS "<!doctype html><script><!--<"
 PASS "<!doctype html><script><!--<a"
-PARSE "<!doctype html><script><!--</"
+PASS "<!doctype html><script><!--</"
 PASS "<!doctype html><script><!--</script"
 PASS "<!doctype html><script><!--</script "
 PASS "<!doctype html><script><!--<s"
@@ -36,16 +36,16 @@ PARSE "<!doctype html><script><!--<script </"
 PARSE "<!doctype html><script><!--<script </s"
 PASS "<!doctype html><script><!--<script </script"
 PASS "<!doctype html><script><!--<script </scripta"
-FAIL "<!doctype html><script><!--<script </script "
-FAIL "<!doctype html><script><!--<script </script>"
-FAIL "<!doctype html><script><!--<script </script/"
-FAIL "<!doctype html><script><!--<script </script <"
-FAIL "<!doctype html><script><!--<script </script <a"
-FAIL "<!doctype html><script><!--<script </script </"
-FAIL "<!doctype html><script><!--<script </script </script"
-FAIL "<!doctype html><script><!--<script </script </script "
-FAIL "<!doctype html><script><!--<script </script </script/"
-FAIL "<!doctype html><script><!--<script </script </script>"
+PASS "<!doctype html><script><!--<script </script "
+PASS "<!doctype html><script><!--<script </script>"
+PASS "<!doctype html><script><!--<script </script/"
+PASS "<!doctype html><script><!--<script </script <"
+PASS "<!doctype html><script><!--<script </script <a"
+PASS "<!doctype html><script><!--<script </script </"
+PASS "<!doctype html><script><!--<script </script </script"
+PASS "<!doctype html><script><!--<script </script </script "
+PASS "<!doctype html><script><!--<script </script </script/"
+PASS "<!doctype html><script><!--<script </script </script>"
 PASS "<!doctype html><script><!--<script -"
 PASS "<!doctype html><script><!--<script -a"
 PARSE "<!doctype html><script><!--<script -<"
@@ -53,23 +53,23 @@ PASS "<!doctype html><script><!--<script --"
 PASS "<!doctype html><script><!--<script --a"
 PARSE "<!doctype html><script><!--<script --<"
 PASS "<!doctype html><script><!--<script -->"
-PARSE "<!doctype html><script><!--<script --><"
-PARSE "<!doctype html><script><!--<script --></"
+PASS "<!doctype html><script><!--<script --><"
+PASS "<!doctype html><script><!--<script --></"
 PASS "<!doctype html><script><!--<script --></script"
 PASS "<!doctype html><script><!--<script --></script "
 PASS "<!doctype html><script><!--<script --></script/"
 PASS "<!doctype html><script><!--<script --></script>"
 PASS "<!doctype html><script><!--<script><\\/script>--></script>"
 PASS "<!doctype html><script><!--<script></scr'+'ipt>--></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>--><!--</script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>-- ></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>- -></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>- - ></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>-></script>"
-FAIL "<!doctype html><script><!--<script>--!></script>X"
+PASS "<!doctype html><script><!--<script></script><script></script></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>--><!--</script>"
+PASS "<!doctype html><script><!--<script></script><script></script>-- ></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>- -></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>- - ></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>-></script>"
+PASS "<!doctype html><script><!--<script>--!></script>X"
 PASS "<!doctype html><script><!--<scr'+'ipt></script>--></script>"
-FAIL "<!doctype html><script><!--<script></scr'+'ipt></script>X"
+PASS "<!doctype html><script><!--<script></scr'+'ipt></script>X"
 PASS "<!doctype html><style><!--<style></style>--></style>"
 PASS "<!doctype html><style><!--</style>X"
 PASS "<!doctype html><style><!--...</style>...--></style>"
@@ -96,20 +96,20 @@ PASS "<!doctype html><xmp><!--<xmp></xmp>--></xmp>"
 PASS "<!doctype html><noembed><!--<noembed></noembed>--></noembed>"
 PASS "<script>"
 PASS "<script>a"
-PARSE "<script><"
-PARSE "<script></"
-PARSE "<script></S"
-PARSE "<script></SC"
-PARSE "<script></SCR"
-PARSE "<script></SCRI"
-PARSE "<script></SCRIP"
+PASS "<script><"
+PASS "<script></"
+PASS "<script></S"
+PASS "<script></SC"
+PASS "<script></SCR"
+PASS "<script></SCRI"
+PASS "<script></SCRIP"
 PASS "<script></SCRIPT"
 PASS "<script></SCRIPT "
-PARSE "<script></s"
-PARSE "<script></sc"
-PARSE "<script></scr"
-PARSE "<script></scri"
-PARSE "<script></scrip"
+PASS "<script></s"
+PASS "<script></sc"
+PASS "<script></scr"
+PASS "<script></scri"
+PASS "<script></scrip"
 PASS "<script></script"
 PASS "<script></script "
 PASS "<script><!"
@@ -118,9 +118,9 @@ PASS "<script><!-"
 PASS "<script><!-a"
 PASS "<script><!--"
 PASS "<script><!--a"
-PARSE "<script><!--<"
+PASS "<script><!--<"
 PASS "<script><!--<a"
-PARSE "<script><!--</"
+PASS "<script><!--</"
 PASS "<script><!--</script"
 PASS "<script><!--</script "
 PASS "<script><!--<s"
@@ -132,38 +132,38 @@ PARSE "<script><!--<script </"
 PARSE "<script><!--<script </s"
 PASS "<script><!--<script </script"
 PASS "<script><!--<script </scripta"
-FAIL "<script><!--<script </script "
-FAIL "<script><!--<script </script>"
-FAIL "<script><!--<script </script/"
-FAIL "<script><!--<script </script <"
-FAIL "<script><!--<script </script <a"
-FAIL "<script><!--<script </script </"
-FAIL "<script><!--<script </script </script"
-FAIL "<script><!--<script </script </script "
-FAIL "<script><!--<script </script </script/"
-FAIL "<script><!--<script </script </script>"
+PASS "<script><!--<script </script "
+PASS "<script><!--<script </script>"
+PASS "<script><!--<script </script/"
+PASS "<script><!--<script </script <"
+PASS "<script><!--<script </script <a"
+PASS "<script><!--<script </script </"
+PASS "<script><!--<script </script </script"
+PASS "<script><!--<script </script </script "
+PASS "<script><!--<script </script </script/"
+PASS "<script><!--<script </script </script>"
 PASS "<script><!--<script -"
 PASS "<script><!--<script -a"
 PASS "<script><!--<script --"
 PASS "<script><!--<script --a"
 PASS "<script><!--<script -->"
-PARSE "<script><!--<script --><"
-PARSE "<script><!--<script --></"
+PASS "<script><!--<script --><"
+PASS "<script><!--<script --></"
 PASS "<script><!--<script --></script"
 PASS "<script><!--<script --></script "
 PASS "<script><!--<script --></script/"
 PASS "<script><!--<script --></script>"
 PASS "<script><!--<script><\\/script>--></script>"
 PASS "<script><!--<script></scr'+'ipt>--></script>"
-FAIL "<script><!--<script></script><script></script></script>"
-FAIL "<script><!--<script></script><script></script>--><!--</script>"
-FAIL "<script><!--<script></script><script></script>-- ></script>"
-FAIL "<script><!--<script></script><script></script>- -></script>"
-FAIL "<script><!--<script></script><script></script>- - ></script>"
-FAIL "<script><!--<script></script><script></script>-></script>"
-FAIL "<script><!--<script>--!></script>X"
+PASS "<script><!--<script></script><script></script></script>"
+PASS "<script><!--<script></script><script></script>--><!--</script>"
+PASS "<script><!--<script></script><script></script>-- ></script>"
+PASS "<script><!--<script></script><script></script>- -></script>"
+PASS "<script><!--<script></script><script></script>- - ></script>"
+PASS "<script><!--<script></script><script></script>-></script>"
+PASS "<script><!--<script>--!></script>X"
 PASS "<script><!--<scr'+'ipt></script>--></script>"
-FAIL "<script><!--<script></scr'+'ipt></script>X"
+PASS "<script><!--<script></scr'+'ipt></script>X"
 PASS "<style><!--<style></style>--></style>"
 PASS "<style><!--</style>X"
 PASS "<style><!--...</style>...--></style>"
index 7e431c21efa8f00358045a757fb01d66fee1cf29..7ee0efc669bb57aa4151c4ffd55b22be0ea3e4cc 100644 (file)
@@ -241,6 +241,12 @@ func (z *Tokenizer) skipWhiteSpace() {
 // readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
 // is typically something like "script" or "textarea".
 func (z *Tokenizer) readRawOrRCDATA() {
+       if z.rawTag == "script" {
+               z.readScript()
+               z.textIsRaw = true
+               z.rawTag = ""
+               return
+       }
 loop:
        for {
                c := z.readByte()
@@ -257,27 +263,8 @@ loop:
                if c != '/' {
                        continue loop
                }
-               for i := 0; i < len(z.rawTag); i++ {
-                       c = z.readByte()
-                       if z.err != nil {
-                               break loop
-                       }
-                       if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
-                               continue loop
-                       }
-               }
-               c = z.readByte()
-               if z.err != nil {
-                       break loop
-               }
-               switch c {
-               case ' ', '\n', '\r', '\t', '\f', '/', '>':
-                       // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
-                       z.raw.end -= 3 + len(z.rawTag)
+               if z.readRawEndTag() || z.err != nil {
                        break loop
-               case '<':
-                       // Step back one, to catch "</foo</foo>".
-                       z.raw.end--
                }
        }
        z.data.end = z.raw.end
@@ -286,6 +273,242 @@ loop:
        z.rawTag = ""
 }
 
+// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
+// If it succeeds, it backs up the input position to reconsume the tag and 
+// returns true. Otherwise it returns false. The opening "</" has already been
+// consumed.
+func (z *Tokenizer) readRawEndTag() bool {
+       for i := 0; i < len(z.rawTag); i++ {
+               c := z.readByte()
+               if z.err != nil {
+                       return false
+               }
+               if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
+                       z.raw.end--
+                       return false
+               }
+       }
+       c := z.readByte()
+       if z.err != nil {
+               return false
+       }
+       switch c {
+       case ' ', '\n', '\r', '\t', '\f', '/', '>':
+               // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
+               z.raw.end -= 3 + len(z.rawTag)
+               return true
+       }
+       z.raw.end--
+       return false
+}
+
+// readScript reads until the next </script> tag, following the byzantine
+// rules for escaping/hiding the closing tag.
+func (z *Tokenizer) readScript() {
+       defer func() {
+               z.data.end = z.raw.end
+       }()
+       var c byte
+
+scriptData:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '<' {
+               goto scriptDataLessThanSign
+       }
+       goto scriptData
+
+scriptDataLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '/':
+               goto scriptDataEndTagOpen
+       case '!':
+               goto scriptDataEscapeStart
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEndTagOpen:
+       if z.readRawEndTag() || z.err != nil {
+               return
+       }
+       goto scriptData
+
+scriptDataEscapeStart:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '-' {
+               goto scriptDataEscapeStartDash
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscapeStartDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '-' {
+               goto scriptDataEscapedDashDash
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscaped:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDashDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedDashDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDashDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       case '>':
+               goto scriptData
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '/' {
+               goto scriptDataEscapedEndTagOpen
+       }
+       if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+               goto scriptDataDoubleEscapeStart
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscapedEndTagOpen:
+       if z.readRawEndTag() || z.err != nil {
+               return
+       }
+       goto scriptDataEscaped
+
+scriptDataDoubleEscapeStart:
+       z.raw.end--
+       for i := 0; i < len("script"); i++ {
+               c = z.readByte()
+               if z.err != nil {
+                       return
+               }
+               if c != "script"[i] && c != "SCRIPT"[i] {
+                       z.raw.end--
+                       goto scriptDataEscaped
+               }
+       }
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case ' ', '\n', '\r', '\t', '\f', '/', '>':
+               goto scriptDataDoubleEscaped
+       }
+       z.raw.end--
+       goto scriptDataEscaped
+
+scriptDataDoubleEscaped:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDashDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDashDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDashDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       case '>':
+               goto scriptData
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '/' {
+               goto scriptDataDoubleEscapeEnd
+       }
+       z.raw.end--
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapeEnd:
+       if z.readRawEndTag() {
+               z.raw.end += len("</script>")
+               goto scriptDataEscaped
+       }
+       if z.err != nil {
+               return
+       }
+       goto scriptDataDoubleEscaped
+}
+
 // readComment reads the next comment token starting with "<!--". The opening
 // "<!--" has already been consumed.
 func (z *Tokenizer) readComment() {