From 3df0512469e98361b94e6107d6d12842f7c545b4 Mon Sep 17 00:00:00 2001 From: Andrew Balholm Date: Sat, 12 Nov 2011 12:23:30 +1100 Subject: [PATCH] html: handle end tags in strange places Pass tests1.dat, test 111:

| | | |
|

Also pass all the remaining tests in tests1.dat. R=nigeltao CC=golang-dev https://golang.org/cl/5372066 --- src/pkg/html/parse.go | 47 ++++++++++++++------------------------ src/pkg/html/parse_test.go | 2 +- 2 files changed, 18 insertions(+), 31 deletions(-) diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index e609cce129..9dd5a4091c 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -352,30 +352,19 @@ func initialIM(p *parser) (insertionMode, bool) { // Section 11.2.5.4.2. func beforeHTMLIM(p *parser) (insertionMode, bool) { - var ( - add bool - attr []Attribute - implied bool - ) switch p.tok.Type { - case ErrorToken: - implied = true - case TextToken: - // TODO: distinguish whitespace text from others. - implied = true case StartTagToken: if p.tok.Data == "html" { - add = true - attr = p.tok.Attr - } else { - implied = true + p.addElement(p.tok.Data, p.tok.Attr) + return beforeHeadIM, true } case EndTagToken: switch p.tok.Data { case "head", "body", "html", "br": - implied = true + // Drop down to creating an implied tag. default: // Ignore the token. + return beforeHTMLIM, true } case CommentToken: p.doc.Add(&Node{ @@ -384,10 +373,9 @@ func beforeHTMLIM(p *parser) (insertionMode, bool) { }) return beforeHTMLIM, true } - if add || implied { - p.addElement("html", attr) - } - return beforeHeadIM, !implied + // Create an implied tag. + p.addElement("html", nil) + return beforeHeadIM, false } // Section 11.2.5.4.3. @@ -691,6 +679,9 @@ func inBodyIM(p *parser) (insertionMode, bool) { if p.popUntil(defaultScopeStopTags, p.tok.Data) { p.clearActiveFormattingElements() } + case "br": + p.tok.Type = StartTagToken + return inBodyIM, false default: p.inBodyEndTagOther(p.tok.Data) } @@ -1192,18 +1183,15 @@ func inSelectIM(p *parser) (insertionMode, bool) { func afterBodyIM(p *parser) (insertionMode, bool) { switch p.tok.Type { case ErrorToken: - // TODO. - case TextToken: - // TODO. + // Stop parsing. + return nil, true case StartTagToken: - // TODO. + if p.tok.Data == "html" { + return useTheRulesFor(p, afterBodyIM, inBodyIM) + } case EndTagToken: - switch p.tok.Data { - case "html": - // TODO: autoclose the stack of open elements. + if p.tok.Data == "html" { return afterAfterBodyIM, true - default: - // TODO. } case CommentToken: // The comment is attached to the element. @@ -1216,8 +1204,7 @@ func afterBodyIM(p *parser) (insertionMode, bool) { }) return afterBodyIM, true } - // TODO: should this be "return inBodyIM, true"? - return afterBodyIM, true + return inBodyIM, false } // Section 11.2.5.4.19. diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index d783ee32c9..13c50a99bc 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -133,7 +133,7 @@ func TestParser(t *testing.T) { n int }{ // TODO(nigeltao): Process all the test cases from all the .dat files. - {"tests1.dat", 111}, + {"tests1.dat", -1}, {"tests2.dat", 0}, {"tests3.dat", 0}, } -- 2.50.0