PASS "<!doctype html><script>"
PASS "<!doctype html><script>a"
-PARSE "<!doctype html><script><"
-PARSE "<!doctype html><script></"
-PARSE "<!doctype html><script></S"
-PARSE "<!doctype html><script></SC"
-PARSE "<!doctype html><script></SCR"
-PARSE "<!doctype html><script></SCRI"
-PARSE "<!doctype html><script></SCRIP"
+PASS "<!doctype html><script><"
+PASS "<!doctype html><script></"
+PASS "<!doctype html><script></S"
+PASS "<!doctype html><script></SC"
+PASS "<!doctype html><script></SCR"
+PASS "<!doctype html><script></SCRI"
+PASS "<!doctype html><script></SCRIP"
PASS "<!doctype html><script></SCRIPT"
PASS "<!doctype html><script></SCRIPT "
-PARSE "<!doctype html><script></s"
-PARSE "<!doctype html><script></sc"
-PARSE "<!doctype html><script></scr"
-PARSE "<!doctype html><script></scri"
-PARSE "<!doctype html><script></scrip"
+PASS "<!doctype html><script></s"
+PASS "<!doctype html><script></sc"
+PASS "<!doctype html><script></scr"
+PASS "<!doctype html><script></scri"
+PASS "<!doctype html><script></scrip"
PASS "<!doctype html><script></script"
PASS "<!doctype html><script></script "
PASS "<!doctype html><script><!"
PASS "<!doctype html><script><!-a"
PASS "<!doctype html><script><!--"
PASS "<!doctype html><script><!--a"
-PARSE "<!doctype html><script><!--<"
+PASS "<!doctype html><script><!--<"
PASS "<!doctype html><script><!--<a"
-PARSE "<!doctype html><script><!--</"
+PASS "<!doctype html><script><!--</"
PASS "<!doctype html><script><!--</script"
PASS "<!doctype html><script><!--</script "
PASS "<!doctype html><script><!--<s"
PARSE "<!doctype html><script><!--<script </s"
PASS "<!doctype html><script><!--<script </script"
PASS "<!doctype html><script><!--<script </scripta"
-FAIL "<!doctype html><script><!--<script </script "
-FAIL "<!doctype html><script><!--<script </script>"
-FAIL "<!doctype html><script><!--<script </script/"
-FAIL "<!doctype html><script><!--<script </script <"
-FAIL "<!doctype html><script><!--<script </script <a"
-FAIL "<!doctype html><script><!--<script </script </"
-FAIL "<!doctype html><script><!--<script </script </script"
-FAIL "<!doctype html><script><!--<script </script </script "
-FAIL "<!doctype html><script><!--<script </script </script/"
-FAIL "<!doctype html><script><!--<script </script </script>"
+PASS "<!doctype html><script><!--<script </script "
+PASS "<!doctype html><script><!--<script </script>"
+PASS "<!doctype html><script><!--<script </script/"
+PASS "<!doctype html><script><!--<script </script <"
+PASS "<!doctype html><script><!--<script </script <a"
+PASS "<!doctype html><script><!--<script </script </"
+PASS "<!doctype html><script><!--<script </script </script"
+PASS "<!doctype html><script><!--<script </script </script "
+PASS "<!doctype html><script><!--<script </script </script/"
+PASS "<!doctype html><script><!--<script </script </script>"
PASS "<!doctype html><script><!--<script -"
PASS "<!doctype html><script><!--<script -a"
PARSE "<!doctype html><script><!--<script -<"
PASS "<!doctype html><script><!--<script --a"
PARSE "<!doctype html><script><!--<script --<"
PASS "<!doctype html><script><!--<script -->"
-PARSE "<!doctype html><script><!--<script --><"
-PARSE "<!doctype html><script><!--<script --></"
+PASS "<!doctype html><script><!--<script --><"
+PASS "<!doctype html><script><!--<script --></"
PASS "<!doctype html><script><!--<script --></script"
PASS "<!doctype html><script><!--<script --></script "
PASS "<!doctype html><script><!--<script --></script/"
PASS "<!doctype html><script><!--<script --></script>"
PASS "<!doctype html><script><!--<script><\\/script>--></script>"
PASS "<!doctype html><script><!--<script></scr'+'ipt>--></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>--><!--</script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>-- ></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>- -></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>- - ></script>"
-FAIL "<!doctype html><script><!--<script></script><script></script>-></script>"
-FAIL "<!doctype html><script><!--<script>--!></script>X"
+PASS "<!doctype html><script><!--<script></script><script></script></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>--><!--</script>"
+PASS "<!doctype html><script><!--<script></script><script></script>-- ></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>- -></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>- - ></script>"
+PASS "<!doctype html><script><!--<script></script><script></script>-></script>"
+PASS "<!doctype html><script><!--<script>--!></script>X"
PASS "<!doctype html><script><!--<scr'+'ipt></script>--></script>"
-FAIL "<!doctype html><script><!--<script></scr'+'ipt></script>X"
+PASS "<!doctype html><script><!--<script></scr'+'ipt></script>X"
PASS "<!doctype html><style><!--<style></style>--></style>"
PASS "<!doctype html><style><!--</style>X"
PASS "<!doctype html><style><!--...</style>...--></style>"
PASS "<!doctype html><noembed><!--<noembed></noembed>--></noembed>"
PASS "<script>"
PASS "<script>a"
-PARSE "<script><"
-PARSE "<script></"
-PARSE "<script></S"
-PARSE "<script></SC"
-PARSE "<script></SCR"
-PARSE "<script></SCRI"
-PARSE "<script></SCRIP"
+PASS "<script><"
+PASS "<script></"
+PASS "<script></S"
+PASS "<script></SC"
+PASS "<script></SCR"
+PASS "<script></SCRI"
+PASS "<script></SCRIP"
PASS "<script></SCRIPT"
PASS "<script></SCRIPT "
-PARSE "<script></s"
-PARSE "<script></sc"
-PARSE "<script></scr"
-PARSE "<script></scri"
-PARSE "<script></scrip"
+PASS "<script></s"
+PASS "<script></sc"
+PASS "<script></scr"
+PASS "<script></scri"
+PASS "<script></scrip"
PASS "<script></script"
PASS "<script></script "
PASS "<script><!"
PASS "<script><!-a"
PASS "<script><!--"
PASS "<script><!--a"
-PARSE "<script><!--<"
+PASS "<script><!--<"
PASS "<script><!--<a"
-PARSE "<script><!--</"
+PASS "<script><!--</"
PASS "<script><!--</script"
PASS "<script><!--</script "
PASS "<script><!--<s"
PARSE "<script><!--<script </s"
PASS "<script><!--<script </script"
PASS "<script><!--<script </scripta"
-FAIL "<script><!--<script </script "
-FAIL "<script><!--<script </script>"
-FAIL "<script><!--<script </script/"
-FAIL "<script><!--<script </script <"
-FAIL "<script><!--<script </script <a"
-FAIL "<script><!--<script </script </"
-FAIL "<script><!--<script </script </script"
-FAIL "<script><!--<script </script </script "
-FAIL "<script><!--<script </script </script/"
-FAIL "<script><!--<script </script </script>"
+PASS "<script><!--<script </script "
+PASS "<script><!--<script </script>"
+PASS "<script><!--<script </script/"
+PASS "<script><!--<script </script <"
+PASS "<script><!--<script </script <a"
+PASS "<script><!--<script </script </"
+PASS "<script><!--<script </script </script"
+PASS "<script><!--<script </script </script "
+PASS "<script><!--<script </script </script/"
+PASS "<script><!--<script </script </script>"
PASS "<script><!--<script -"
PASS "<script><!--<script -a"
PASS "<script><!--<script --"
PASS "<script><!--<script --a"
PASS "<script><!--<script -->"
-PARSE "<script><!--<script --><"
-PARSE "<script><!--<script --></"
+PASS "<script><!--<script --><"
+PASS "<script><!--<script --></"
PASS "<script><!--<script --></script"
PASS "<script><!--<script --></script "
PASS "<script><!--<script --></script/"
PASS "<script><!--<script --></script>"
PASS "<script><!--<script><\\/script>--></script>"
PASS "<script><!--<script></scr'+'ipt>--></script>"
-FAIL "<script><!--<script></script><script></script></script>"
-FAIL "<script><!--<script></script><script></script>--><!--</script>"
-FAIL "<script><!--<script></script><script></script>-- ></script>"
-FAIL "<script><!--<script></script><script></script>- -></script>"
-FAIL "<script><!--<script></script><script></script>- - ></script>"
-FAIL "<script><!--<script></script><script></script>-></script>"
-FAIL "<script><!--<script>--!></script>X"
+PASS "<script><!--<script></script><script></script></script>"
+PASS "<script><!--<script></script><script></script>--><!--</script>"
+PASS "<script><!--<script></script><script></script>-- ></script>"
+PASS "<script><!--<script></script><script></script>- -></script>"
+PASS "<script><!--<script></script><script></script>- - ></script>"
+PASS "<script><!--<script></script><script></script>-></script>"
+PASS "<script><!--<script>--!></script>X"
PASS "<script><!--<scr'+'ipt></script>--></script>"
-FAIL "<script><!--<script></scr'+'ipt></script>X"
+PASS "<script><!--<script></scr'+'ipt></script>X"
PASS "<style><!--<style></style>--></style>"
PASS "<style><!--</style>X"
PASS "<style><!--...</style>...--></style>"
// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
// is typically something like "script" or "textarea".
func (z *Tokenizer) readRawOrRCDATA() {
+ if z.rawTag == "script" {
+ z.readScript()
+ z.textIsRaw = true
+ z.rawTag = ""
+ return
+ }
loop:
for {
c := z.readByte()
if c != '/' {
continue loop
}
- for i := 0; i < len(z.rawTag); i++ {
- c = z.readByte()
- if z.err != nil {
- break loop
- }
- if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
- continue loop
- }
- }
- c = z.readByte()
- if z.err != nil {
- break loop
- }
- switch c {
- case ' ', '\n', '\r', '\t', '\f', '/', '>':
- // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
- z.raw.end -= 3 + len(z.rawTag)
+ if z.readRawEndTag() || z.err != nil {
break loop
- case '<':
- // Step back one, to catch "</foo</foo>".
- z.raw.end--
}
}
z.data.end = z.raw.end
z.rawTag = ""
}
+// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
+// If it succeeds, it backs up the input position to reconsume the tag and
+// returns true. Otherwise it returns false. The opening "</" has already been
+// consumed.
+func (z *Tokenizer) readRawEndTag() bool {
+ for i := 0; i < len(z.rawTag); i++ {
+ c := z.readByte()
+ if z.err != nil {
+ return false
+ }
+ if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
+ z.raw.end--
+ return false
+ }
+ }
+ c := z.readByte()
+ if z.err != nil {
+ return false
+ }
+ switch c {
+ case ' ', '\n', '\r', '\t', '\f', '/', '>':
+ // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
+ z.raw.end -= 3 + len(z.rawTag)
+ return true
+ }
+ z.raw.end--
+ return false
+}
+
+// readScript reads until the next </script> tag, following the byzantine
+// rules for escaping/hiding the closing tag.
+func (z *Tokenizer) readScript() {
+ defer func() {
+ z.data.end = z.raw.end
+ }()
+ var c byte
+
+scriptData:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c == '<' {
+ goto scriptDataLessThanSign
+ }
+ goto scriptData
+
+scriptDataLessThanSign:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '/':
+ goto scriptDataEndTagOpen
+ case '!':
+ goto scriptDataEscapeStart
+ }
+ z.raw.end--
+ goto scriptData
+
+scriptDataEndTagOpen:
+ if z.readRawEndTag() || z.err != nil {
+ return
+ }
+ goto scriptData
+
+scriptDataEscapeStart:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c == '-' {
+ goto scriptDataEscapeStartDash
+ }
+ z.raw.end--
+ goto scriptData
+
+scriptDataEscapeStartDash:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c == '-' {
+ goto scriptDataEscapedDashDash
+ }
+ z.raw.end--
+ goto scriptData
+
+scriptDataEscaped:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataEscapedDash
+ case '<':
+ goto scriptDataEscapedLessThanSign
+ }
+ goto scriptDataEscaped
+
+scriptDataEscapedDash:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataEscapedDashDash
+ case '<':
+ goto scriptDataEscapedLessThanSign
+ }
+ goto scriptDataEscaped
+
+scriptDataEscapedDashDash:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataEscapedDashDash
+ case '<':
+ goto scriptDataEscapedLessThanSign
+ case '>':
+ goto scriptData
+ }
+ goto scriptDataEscaped
+
+scriptDataEscapedLessThanSign:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c == '/' {
+ goto scriptDataEscapedEndTagOpen
+ }
+ if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+ goto scriptDataDoubleEscapeStart
+ }
+ z.raw.end--
+ goto scriptData
+
+scriptDataEscapedEndTagOpen:
+ if z.readRawEndTag() || z.err != nil {
+ return
+ }
+ goto scriptDataEscaped
+
+scriptDataDoubleEscapeStart:
+ z.raw.end--
+ for i := 0; i < len("script"); i++ {
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c != "script"[i] && c != "SCRIPT"[i] {
+ z.raw.end--
+ goto scriptDataEscaped
+ }
+ }
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case ' ', '\n', '\r', '\t', '\f', '/', '>':
+ goto scriptDataDoubleEscaped
+ }
+ z.raw.end--
+ goto scriptDataEscaped
+
+scriptDataDoubleEscaped:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataDoubleEscapedDash
+ case '<':
+ goto scriptDataDoubleEscapedLessThanSign
+ }
+ goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDash:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataDoubleEscapedDashDash
+ case '<':
+ goto scriptDataDoubleEscapedLessThanSign
+ }
+ goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDashDash:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ switch c {
+ case '-':
+ goto scriptDataDoubleEscapedDashDash
+ case '<':
+ goto scriptDataDoubleEscapedLessThanSign
+ case '>':
+ goto scriptData
+ }
+ goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedLessThanSign:
+ c = z.readByte()
+ if z.err != nil {
+ return
+ }
+ if c == '/' {
+ goto scriptDataDoubleEscapeEnd
+ }
+ z.raw.end--
+ goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapeEnd:
+ if z.readRawEndTag() {
+ z.raw.end += len("</script>")
+ goto scriptDataEscaped
+ }
+ if z.err != nil {
+ return
+ }
+ goto scriptDataDoubleEscaped
+}
+
// readComment reads the next comment token starting with "<!--". The opening
// "<!--" has already been consumed.
func (z *Tokenizer) readComment() {