]> Cypherpunks repositories - gostls13.git/commitdiff
html: handle breakout tags in foreign content.
authorNigel Tao <nigeltao@golang.org>
Tue, 20 Dec 2011 23:00:41 +0000 (10:00 +1100)
committerNigel Tao <nigeltao@golang.org>
Tue, 20 Dec 2011 23:00:41 +0000 (10:00 +1100)
Also recognize that, in the latest version of the HTML5 spec,
foreign content is not an insertion mode, but a separate concern.

Pass tests10.dat, test 13:
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux

| <!DOCTYPE html>
| <html>
|   <head>
|   <body>
|     <table>
|       <caption>
|         <svg svg>
|           <svg g>
|             "foo"
|           <svg g>
|             "bar"
|         <p>
|           "baz"
|     <p>
|       "quux"

Also pass tests through test 15:
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux

R=andybalholm
CC=golang-dev
https://golang.org/cl/5494078

src/pkg/html/parse.go
src/pkg/html/parse_test.go

index 6892d8fefa04ec122aca134085c119e5cd31a663..67356e450c490f1079736669bd6a3869638f448a 100644 (file)
@@ -319,10 +319,7 @@ func (p *parser) resetInsertionMode() {
                case "html":
                        p.im = beforeHeadIM
                default:
-                       if p.top().Namespace == "" {
-                               continue
-                       }
-                       p.im = inForeignContentIM
+                       continue
                }
                return
        }
@@ -814,7 +811,6 @@ func inBodyIM(p *parser) bool {
                        // TODO: adjust foreign attributes.
                        p.addElement(p.tok.Data, p.tok.Attr)
                        p.top().Namespace = namespace
-                       p.im = inForeignContentIM
                        return true
                case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
                        // Ignore the token.
@@ -1590,7 +1586,7 @@ func afterAfterFramesetIM(p *parser) bool {
 }
 
 // Section 12.2.5.5.
-func inForeignContentIM(p *parser) bool {
+func parseForeignContent(p *parser) bool {
        switch p.tok.Type {
        case TextToken:
                // TODO: HTML integration points.
@@ -1610,7 +1606,14 @@ func inForeignContentIM(p *parser) bool {
                })
        case StartTagToken:
                if breakout[p.tok.Data] {
-                       // TODO.
+                       for i := len(p.oe) - 1; i >= 0; i-- {
+                               // TODO: HTML, MathML integration points.
+                               if p.oe[i].Namespace == "" {
+                                       p.oe = p.oe[:i+1]
+                                       break
+                               }
+                       }
+                       return false
                }
                switch p.top().Namespace {
                case "mathml":
@@ -1626,15 +1629,13 @@ func inForeignContentIM(p *parser) bool {
        case EndTagToken:
                for i := len(p.oe) - 1; i >= 0; i-- {
                        if p.oe[i].Namespace == "" {
-                               inBodyIM(p)
-                               break
+                               return p.im(p)
                        }
                        if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
                                p.oe = p.oe[:i]
                                break
                        }
                }
-               p.resetInsertionMode()
                return true
        default:
                // Ignore the token.
@@ -1642,6 +1643,20 @@ func inForeignContentIM(p *parser) bool {
        return true
 }
 
+// Section 12.2.5.
+func (p *parser) inForeignContent() bool {
+       if len(p.oe) == 0 {
+               return false
+       }
+       n := p.oe[len(p.oe)-1]
+       if n.Namespace == "" {
+               return false
+       }
+       // TODO: MathML, HTML integration points.
+       // TODO: MathML's annotation-xml combining with SVG's svg.
+       return true
+}
+
 func (p *parser) parse() error {
        // Iterate until EOF. Any other error will cause an early return.
        consumed := true
@@ -1654,7 +1669,11 @@ func (p *parser) parse() error {
                                return err
                        }
                }
-               consumed = p.im(p)
+               if p.inForeignContent() {
+                       consumed = parseForeignContent(p)
+               } else {
+                       consumed = p.im(p)
+               }
        }
        // Loop until the final token (the ErrorToken signifying EOF) is consumed.
        for {
index 7ca2922cc31b574fe6befb2851038f69a1a517ae..1c2df5a7ee37899e5b245579f63504b730c05b15 100644 (file)
@@ -173,7 +173,7 @@ func TestParser(t *testing.T) {
                {"tests4.dat", -1},
                {"tests5.dat", -1},
                {"tests6.dat", 45},
-               {"tests10.dat", 13},
+               {"tests10.dat", 16},
        }
        for _, tf := range testFiles {
                f, err := os.Open("testdata/webkit/" + tf.filename)