From fe28d1aacf108cb7b7a4ec573a019e193d07c696 Mon Sep 17 00:00:00 2001 From: Nigel Tao Date: Wed, 21 Dec 2011 10:00:41 +1100 Subject: [PATCH] html: handle breakout tags in foreign content. Also recognize that, in the latest version of the HTML5 spec, foreign content is not an insertion mode, but a separate concern. Pass tests10.dat, test 13:
foobar

baz

quux | | | | | |
| | | "foo" | | "bar" |

| "baz" |

| "quux" Also pass tests through test 15: foobar

baz

quux R=andybalholm CC=golang-dev https://golang.org/cl/5494078 --- src/pkg/html/parse.go | 41 ++++++++++++++++++++++++++++---------- src/pkg/html/parse_test.go | 2 +- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go index 6892d8fefa..67356e450c 100644 --- a/src/pkg/html/parse.go +++ b/src/pkg/html/parse.go @@ -319,10 +319,7 @@ func (p *parser) resetInsertionMode() { case "html": p.im = beforeHeadIM default: - if p.top().Namespace == "" { - continue - } - p.im = inForeignContentIM + continue } return } @@ -814,7 +811,6 @@ func inBodyIM(p *parser) bool { // TODO: adjust foreign attributes. p.addElement(p.tok.Data, p.tok.Attr) p.top().Namespace = namespace - p.im = inForeignContentIM return true case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": // Ignore the token. @@ -1590,7 +1586,7 @@ func afterAfterFramesetIM(p *parser) bool { } // Section 12.2.5.5. -func inForeignContentIM(p *parser) bool { +func parseForeignContent(p *parser) bool { switch p.tok.Type { case TextToken: // TODO: HTML integration points. @@ -1610,7 +1606,14 @@ func inForeignContentIM(p *parser) bool { }) case StartTagToken: if breakout[p.tok.Data] { - // TODO. + for i := len(p.oe) - 1; i >= 0; i-- { + // TODO: HTML, MathML integration points. + if p.oe[i].Namespace == "" { + p.oe = p.oe[:i+1] + break + } + } + return false } switch p.top().Namespace { case "mathml": @@ -1626,15 +1629,13 @@ func inForeignContentIM(p *parser) bool { case EndTagToken: for i := len(p.oe) - 1; i >= 0; i-- { if p.oe[i].Namespace == "" { - inBodyIM(p) - break + return p.im(p) } if strings.EqualFold(p.oe[i].Data, p.tok.Data) { p.oe = p.oe[:i] break } } - p.resetInsertionMode() return true default: // Ignore the token. @@ -1642,6 +1643,20 @@ func inForeignContentIM(p *parser) bool { return true } +// Section 12.2.5. +func (p *parser) inForeignContent() bool { + if len(p.oe) == 0 { + return false + } + n := p.oe[len(p.oe)-1] + if n.Namespace == "" { + return false + } + // TODO: MathML, HTML integration points. + // TODO: MathML's annotation-xml combining with SVG's svg. + return true +} + func (p *parser) parse() error { // Iterate until EOF. Any other error will cause an early return. consumed := true @@ -1654,7 +1669,11 @@ func (p *parser) parse() error { return err } } - consumed = p.im(p) + if p.inForeignContent() { + consumed = parseForeignContent(p) + } else { + consumed = p.im(p) + } } // Loop until the final token (the ErrorToken signifying EOF) is consumed. for { diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index 7ca2922cc3..1c2df5a7ee 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -173,7 +173,7 @@ func TestParser(t *testing.T) { {"tests4.dat", -1}, {"tests5.dat", -1}, {"tests6.dat", 45}, - {"tests10.dat", 13}, + {"tests10.dat", 16}, } for _, tf := range testFiles { f, err := os.Open("testdata/webkit/" + tf.filename) -- 2.48.1