From aa9a81b1b098f3482bd648fbb634756cfa403fd5 Mon Sep 17 00:00:00 2001 From: Andrew Balholm Date: Mon, 13 Aug 2012 12:07:44 +1000 Subject: [PATCH] exp/html: discard tags that are terminated by EOF instead of by '>' If a tag doesn't have a closing '>', it isn't considered a tag; it is just ignored and EOF is returned instead. Pass one additional test in the test suite. Change tokenizer tests to match correct behavior. R=nigeltao CC=golang-dev https://golang.org/cl/6454131 --- src/pkg/exp/html/testlogs/webkit02.dat.log | 2 +- src/pkg/exp/html/token.go | 8 ++++++-- src/pkg/exp/html/token_test.go | 17 +++++++++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/pkg/exp/html/testlogs/webkit02.dat.log b/src/pkg/exp/html/testlogs/webkit02.dat.log index fd0ae38eb8..72c04376a3 100644 --- a/src/pkg/exp/html/testlogs/webkit02.dat.log +++ b/src/pkg/exp/html/testlogs/webkit02.dat.log @@ -1,7 +1,7 @@ PASS "" PASS "

B

" PASS "
" -FAIL "\"
A
" +PASS "\"
A
" PASS "A" PASS "
A" PASS "
A" diff --git a/src/pkg/exp/html/token.go b/src/pkg/exp/html/token.go index 38d8024e0e..7bc77329d8 100644 --- a/src/pkg/exp/html/token.go +++ b/src/pkg/exp/html/token.go @@ -692,7 +692,7 @@ loop: // been consumed, where 'a' means anything in [A-Za-z]. func (z *Tokenizer) readStartTag() TokenType { z.readTag(true) - if z.err != nil && len(z.attr) == 0 { + if z.err != nil { return ErrorToken } // Several tags flag the tokenizer's next token as raw. @@ -948,7 +948,11 @@ loop: } if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { z.readTag(false) - z.tt = EndTagToken + if z.err != nil { + z.tt = ErrorToken + } else { + z.tt = EndTagToken + } return z.tt } z.raw.end-- diff --git a/src/pkg/exp/html/token_test.go b/src/pkg/exp/html/token_test.go index 5209d139bb..0f873f0fb1 100644 --- a/src/pkg/exp/html/token_test.go +++ b/src/pkg/exp/html/token_test.go @@ -128,7 +128,7 @@ var tokenTests = []tokenTest{ { "tag name eof #4", ``, + ``, }, // Some malformed tags that are missing a '>'. { @@ -144,12 +144,12 @@ var tokenTests = []tokenTest{ { "malformed tag #2", `

`, + ``, }, { "malformed tag #3", `

`, + ``, }, { "malformed tag #4", @@ -159,7 +159,7 @@ var tokenTests = []tokenTest{ { "malformed tag #5", `

`, + ``, }, { "malformed tag #6", @@ -169,13 +169,18 @@ var tokenTests = []tokenTest{ { "malformed tag #7", `

`, + ``, }, { "malformed tag #8", `

`, `

`, }, + { + "malformed tag #9", + `

`, + }, // Raw text and RCDATA. { "basic raw text", @@ -205,7 +210,7 @@ var tokenTests = []tokenTest{ { "' ' completes script end tag", "$a$", + "