]> Cypherpunks repositories - gostls13.git/commitdiff
exp/html: discard tags that are terminated by EOF instead of by '>'
authorAndrew Balholm <andybalholm@gmail.com>
Mon, 13 Aug 2012 02:07:44 +0000 (12:07 +1000)
committerNigel Tao <nigeltao@golang.org>
Mon, 13 Aug 2012 02:07:44 +0000 (12:07 +1000)
If a tag doesn't have a closing '>', it isn't considered a tag;
it is just ignored and EOF is returned instead.

Pass one additional test in the test suite.

Change tokenizer tests to match correct behavior.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6454131

src/pkg/exp/html/testlogs/webkit02.dat.log
src/pkg/exp/html/token.go
src/pkg/exp/html/token_test.go

index fd0ae38eb8269f64a8a08e609a3685bdcd03038d..72c04376a34fcf0966c493ff2fd6447ff97cea35 100644 (file)
@@ -1,7 +1,7 @@
 PASS "<foo bar=qux/>"
 PASS "<p id=\"status\"><noscript><strong>A</strong></noscript><span>B</span></p>"
 PASS "<div><sarcasm><div></div></sarcasm></div>"
-FAIL "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
+PASS "<html><body><img src=\"\" border=\"0\" alt=\"><div>A</div></body></html>"
 PASS "<table><td></tbody>A"
 PASS "<table><td></thead>A"
 PASS "<table><td></tfoot>A"
index 38d8024e0e11b16c4cd5a3e3a2eaaead053eeecd..7bc77329d8d091992e3e407aa885714252469884 100644 (file)
@@ -692,7 +692,7 @@ loop:
 // been consumed, where 'a' means anything in [A-Za-z].
 func (z *Tokenizer) readStartTag() TokenType {
        z.readTag(true)
-       if z.err != nil && len(z.attr) == 0 {
+       if z.err != nil {
                return ErrorToken
        }
        // Several tags flag the tokenizer's next token as raw.
@@ -948,7 +948,11 @@ loop:
                        }
                        if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
                                z.readTag(false)
-                               z.tt = EndTagToken
+                               if z.err != nil {
+                                       z.tt = ErrorToken
+                               } else {
+                                       z.tt = EndTagToken
+                               }
                                return z.tt
                        }
                        z.raw.end--
index 5209d139bb99549c0a8a2d8dbff61e590716c18c..0f873f0fb14474fa417b1bac35cf5209edcef4ee 100644 (file)
@@ -128,7 +128,7 @@ var tokenTests = []tokenTest{
        {
                "tag name eof #4",
                `<a x`,
-               `<a x="">`,
+               ``,
        },
        // Some malformed tags that are missing a '>'.
        {
@@ -144,12 +144,12 @@ var tokenTests = []tokenTest{
        {
                "malformed tag #2",
                `<p id`,
-               `<p id="">`,
+               ``,
        },
        {
                "malformed tag #3",
                `<p id=`,
-               `<p id="">`,
+               ``,
        },
        {
                "malformed tag #4",
@@ -159,7 +159,7 @@ var tokenTests = []tokenTest{
        {
                "malformed tag #5",
                `<p id=0`,
-               `<p id="0">`,
+               ``,
        },
        {
                "malformed tag #6",
@@ -169,13 +169,18 @@ var tokenTests = []tokenTest{
        {
                "malformed tag #7",
                `<p id="0</p>`,
-               `<p id="0&lt;/p&gt;">`,
+               ``,
        },
        {
                "malformed tag #8",
                `<p id="0"</p>`,
                `<p id="0" <="" p="">`,
        },
+       {
+               "malformed tag #9",
+               `<p></p id`,
+               `<p>`,
+       },
        // Raw text and RCDATA.
        {
                "basic raw text",
@@ -205,7 +210,7 @@ var tokenTests = []tokenTest{
        {
                "' ' completes script end tag",
                "<SCRIPT>a</SCRipt ",
-               "<script>$a$</script>",
+               "<script>$a",
        },
        {
                "'>' completes script end tag",