From 9f3b00579eca946337d486776797b78aaf3bc55b Mon Sep 17 00:00:00 2001 From: Andrew Balholm Date: Wed, 1 Aug 2012 09:35:02 +1000 Subject: [PATCH] exp/html: tokenize attributes of end tags If an end tag has an attribute that is a quoted string containing '>', the tokenizer would end the tag prematurely. Now it reads the attributes on end tags just as it does on start tags, but the high-level interface still doesn't return them, because their presence is a parse error. Pass 1 additional test. R=nigeltao CC=golang-dev https://golang.org/cl/6457060 --- .../exp/html/testlogs/scriptdata01.dat.log | 2 +- src/pkg/exp/html/token.go | 71 ++++++++----------- 2 files changed, 32 insertions(+), 41 deletions(-) diff --git a/src/pkg/exp/html/testlogs/scriptdata01.dat.log b/src/pkg/exp/html/testlogs/scriptdata01.dat.log index d5c9d6e331..85b9284d51 100644 --- a/src/pkg/exp/html/testlogs/scriptdata01.dat.log +++ b/src/pkg/exp/html/testlogs/scriptdata01.dat.log @@ -4,7 +4,7 @@ PASS "FOOBAR" PASS "FOOBAR" PASS "FOOBAR" PASS "FOO\" dd>BAR" +PASS "FOO\" dd>BAR" PASS "FOOBAR" PASS "FOOBAR" PASS "FOOBAR" diff --git a/src/pkg/exp/html/token.go b/src/pkg/exp/html/token.go index d4867fc173..7e431c21ef 100644 --- a/src/pkg/exp/html/token.go +++ b/src/pkg/exp/html/token.go @@ -468,29 +468,10 @@ loop: // readStartTag reads the next start tag token. The opening "' { - break - } - z.raw.end-- - z.readTagAttrKey() - z.readTagAttrVal() - // Save pendingAttr if it has a non-empty key. - if z.pendingAttr[0].start != z.pendingAttr[0].end { - z.attr = append(z.attr, z.pendingAttr) - } - if z.skipWhiteSpace(); z.err != nil { - break - } - } // Several tags flag the tokenizer's next token as raw. c, raw := z.buf[z.data.start], false if 'A' <= c && c <= 'Z' { @@ -520,16 +501,30 @@ func (z *Tokenizer) readStartTag() TokenType { return StartTagToken } -// readEndTag reads the next end tag token. The opening "' { - return + break + } + z.raw.end-- + z.readTagAttrKey() + z.readTagAttrVal() + // Save pendingAttr if it has a non-empty key. + if z.pendingAttr[0].start != z.pendingAttr[0].end { + z.attr = append(z.attr, z.pendingAttr) + } + if z.skipWhiteSpace(); z.err != nil { + break } } } @@ -727,7 +722,7 @@ loop: continue loop } if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { - z.readEndTag() + z.readTag() z.tt = EndTagToken return z.tt } @@ -858,22 +853,18 @@ func (z *Tokenizer) Token() Token { switch z.tt { case TextToken, CommentToken, DoctypeToken: t.Data = string(z.Text()) - case StartTagToken, SelfClosingTagToken: - var attr []Attribute + case StartTagToken, SelfClosingTagToken, EndTagToken: name, moreAttr := z.TagName() - for moreAttr { - var key, val []byte - key, val, moreAttr = z.TagAttr() - attr = append(attr, Attribute{"", atom.String(key), string(val)}) - } - if a := atom.Lookup(name); a != 0 { - t.DataAtom, t.Data = a, a.String() - } else { - t.DataAtom, t.Data = 0, string(name) + // Since end tags should not have attributes, the high-level tokenizer + // interface will not return attributes for an end tag token even if + // it looks like
. + if z.tt != EndTagToken { + for moreAttr { + var key, val []byte + key, val, moreAttr = z.TagAttr() + t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)}) + } } - t.Attr = attr - case EndTagToken: - name, _ := z.TagName() if a := atom.Lookup(name); a != 0 { t.DataAtom, t.Data = a, a.String() } else { -- 2.48.1