From: Andrew Balholm Date: Tue, 31 Jul 2012 23:35:02 +0000 (+1000) Subject: exp/html: tokenize attributes of end tags X-Git-Tag: go1.1rc2~2730 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9f3b00579eca946337d486776797b78aaf3bc55b;p=gostls13.git exp/html: tokenize attributes of end tags If an end tag has an attribute that is a quoted string containing '>', the tokenizer would end the tag prematurely. Now it reads the attributes on end tags just as it does on start tags, but the high-level interface still doesn't return them, because their presence is a parse error. Pass 1 additional test. R=nigeltao CC=golang-dev https://golang.org/cl/6457060 --- diff --git a/src/pkg/exp/html/testlogs/scriptdata01.dat.log b/src/pkg/exp/html/testlogs/scriptdata01.dat.log index d5c9d6e331..85b9284d51 100644 --- a/src/pkg/exp/html/testlogs/scriptdata01.dat.log +++ b/src/pkg/exp/html/testlogs/scriptdata01.dat.log @@ -4,7 +4,7 @@ PASS "FOOBAR" PASS "FOOBAR" PASS "FOOBAR" PASS "FOO\" dd>BAR" +PASS "FOO\" dd>BAR" PASS "FOOBAR" PASS "FOOBAR" PASS "FOOBAR" diff --git a/src/pkg/exp/html/token.go b/src/pkg/exp/html/token.go index d4867fc173..7e431c21ef 100644 --- a/src/pkg/exp/html/token.go +++ b/src/pkg/exp/html/token.go @@ -468,29 +468,10 @@ loop: // readStartTag reads the next start tag token. The opening "' { - break - } - z.raw.end-- - z.readTagAttrKey() - z.readTagAttrVal() - // Save pendingAttr if it has a non-empty key. - if z.pendingAttr[0].start != z.pendingAttr[0].end { - z.attr = append(z.attr, z.pendingAttr) - } - if z.skipWhiteSpace(); z.err != nil { - break - } - } // Several tags flag the tokenizer's next token as raw. c, raw := z.buf[z.data.start], false if 'A' <= c && c <= 'Z' { @@ -520,16 +501,30 @@ func (z *Tokenizer) readStartTag() TokenType { return StartTagToken } -// readEndTag reads the next end tag token. The opening "' { - return + break + } + z.raw.end-- + z.readTagAttrKey() + z.readTagAttrVal() + // Save pendingAttr if it has a non-empty key. + if z.pendingAttr[0].start != z.pendingAttr[0].end { + z.attr = append(z.attr, z.pendingAttr) + } + if z.skipWhiteSpace(); z.err != nil { + break } } } @@ -727,7 +722,7 @@ loop: continue loop } if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' { - z.readEndTag() + z.readTag() z.tt = EndTagToken return z.tt } @@ -858,22 +853,18 @@ func (z *Tokenizer) Token() Token { switch z.tt { case TextToken, CommentToken, DoctypeToken: t.Data = string(z.Text()) - case StartTagToken, SelfClosingTagToken: - var attr []Attribute + case StartTagToken, SelfClosingTagToken, EndTagToken: name, moreAttr := z.TagName() - for moreAttr { - var key, val []byte - key, val, moreAttr = z.TagAttr() - attr = append(attr, Attribute{"", atom.String(key), string(val)}) - } - if a := atom.Lookup(name); a != 0 { - t.DataAtom, t.Data = a, a.String() - } else { - t.DataAtom, t.Data = 0, string(name) + // Since end tags should not have attributes, the high-level tokenizer + // interface will not return attributes for an end tag token even if + // it looks like
. + if z.tt != EndTagToken { + for moreAttr { + var key, val []byte + key, val, moreAttr = z.TagAttr() + t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)}) + } } - t.Attr = attr - case EndTagToken: - name, _ := z.TagName() if a := atom.Lookup(name); a != 0 { t.DataAtom, t.Data = a, a.String() } else {