]> Cypherpunks repositories - gostls13.git/commitdiff
html: a first step at parsing foreign content (MathML, SVG).
authorNigel Tao <nigeltao@golang.org>
Tue, 13 Dec 2011 02:52:47 +0000 (13:52 +1100)
committerNigel Tao <nigeltao@golang.org>
Tue, 13 Dec 2011 02:52:47 +0000 (13:52 +1100)
Nodes now have a Namespace field.

Pass adoption01.dat, test 12:
<a><svg><tr><input></a>

| <html>
|   <head>
|   <body>
|     <a>
|       <svg svg>
|         <svg tr>
|           <svg input>

The other adoption01.dat tests already passed.

R=andybalholm
CC=golang-dev
https://golang.org/cl/5467075

src/pkg/html/Makefile
src/pkg/html/foreign.go [new file with mode: 0644]
src/pkg/html/node.go
src/pkg/html/parse.go
src/pkg/html/parse_test.go

index 3c3de8ee310921b19f33e7dc7964468581355e21..da5c3f2a3e510690dacdcfa39e16c03611f2e168 100644 (file)
@@ -11,6 +11,7 @@ GOFILES=\
        doctype.go\
        entity.go\
        escape.go\
+       foreign.go\
        node.go\
        parse.go\
        render.go\
diff --git a/src/pkg/html/foreign.go b/src/pkg/html/foreign.go
new file mode 100644 (file)
index 0000000..0f9b4ad
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// Section 12.2.5.5.
+var breakout = map[string]bool{
+       "b":          true,
+       "big":        true,
+       "blockquote": true,
+       "body":       true,
+       "br":         true,
+       "center":     true,
+       "code":       true,
+       "dd":         true,
+       "div":        true,
+       "dl":         true,
+       "dt":         true,
+       "em":         true,
+       "embed":      true,
+       "font":       true,
+       "h1":         true,
+       "h2":         true,
+       "h3":         true,
+       "h4":         true,
+       "h5":         true,
+       "h6":         true,
+       "head":       true,
+       "hr":         true,
+       "i":          true,
+       "img":        true,
+       "li":         true,
+       "listing":    true,
+       "menu":       true,
+       "meta":       true,
+       "nobr":       true,
+       "ol":         true,
+       "p":          true,
+       "pre":        true,
+       "ruby":       true,
+       "s":          true,
+       "small":      true,
+       "span":       true,
+       "strong":     true,
+       "strike":     true,
+       "sub":        true,
+       "sup":        true,
+       "table":      true,
+       "tt":         true,
+       "u":          true,
+       "ul":         true,
+       "var":        true,
+}
+
+// TODO: add look-up tables for MathML and SVG adjustments.
index 5ca6035c118aac8051a8baf8dd34926d39106d6d..b0d42cece04c9a895f643aac0294510149be6928 100644 (file)
@@ -24,14 +24,15 @@ var scopeMarker = Node{Type: scopeMarkerNode}
 
 // A Node consists of a NodeType and some Data (tag name for element nodes,
 // content for text) and are part of a tree of Nodes. Element nodes may also
-// contain a slice of Attributes. Data is unescaped, so that it looks like
-// "a<b" rather than "a&lt;b".
+// have a Namespace and contain a slice of Attributes. Data is unescaped, so
+// that it looks like "a<b" rather than "a&lt;b".
 type Node struct {
-       Parent *Node
-       Child  []*Node
-       Type   NodeType
-       Data   string
-       Attr   []Attribute
+       Parent    *Node
+       Child     []*Node
+       Type      NodeType
+       Data      string
+       Namespace string
+       Attr      []Attribute
 }
 
 // Add adds a node as a child of n.
index 24cb323a59be1b8c2c216e225125c8a2c2023b73..0fe3a99ba2ba5ea00e7236a230bca0aad762af6d 100644 (file)
@@ -192,9 +192,10 @@ func (p *parser) addText(text string) {
 // addElement calls addChild with an element node.
 func (p *parser) addElement(tag string, attr []Attribute) {
        p.addChild(&Node{
-               Type: ElementNode,
-               Data: tag,
-               Attr: attr,
+               Type:      ElementNode,
+               Data:      tag,
+               Namespace: p.top().Namespace,
+               Attr:      attr,
        })
 }
 
@@ -318,7 +319,10 @@ func (p *parser) resetInsertionMode() {
                case "html":
                        p.im = beforeHeadIM
                default:
-                       continue
+                       if p.top().Namespace == "" {
+                               continue
+                       }
+                       p.im = inForeignContentIM
                }
                return
        }
@@ -792,6 +796,21 @@ func inBodyIM(p *parser) bool {
                        p.reconstructActiveFormattingElements()
                        p.framesetOK = false
                        p.addElement(p.tok.Data, p.tok.Attr)
+               case "math", "svg":
+                       p.reconstructActiveFormattingElements()
+                       namespace := ""
+                       if p.tok.Data == "math" {
+                               // TODO: adjust MathML attributes.
+                               namespace = "mathml"
+                       } else {
+                               // TODO: adjust SVG attributes.
+                               namespace = "svg"
+                       }
+                       // TODO: adjust foreign attributes.
+                       p.addElement(p.tok.Data, p.tok.Attr)
+                       p.top().Namespace = namespace
+                       p.im = inForeignContentIM
+                       return true
                case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
                        // Ignore the token.
                default:
@@ -1557,6 +1576,39 @@ func afterAfterFramesetIM(p *parser) bool {
        return true
 }
 
+// TODO: fix up the other IM's section numbers to match the latest spec.
+
+// Section 12.2.5.5.
+func inForeignContentIM(p *parser) bool {
+       switch p.tok.Type {
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case StartTagToken:
+               if breakout[p.tok.Data] {
+                       // TODO.
+               }
+               switch p.top().Namespace {
+               case "mathml":
+                       // TODO: adjust MathML attributes.
+               case "svg":
+                       // TODO: adjust SVG tag names.
+                       // TODO: adjust SVG attributes.
+               default:
+                       panic("html: bad parser state: unexpected namespace")
+               }
+               // TODO: adjust foreign attributes.
+               p.addElement(p.tok.Data, p.tok.Attr)
+       case EndTagToken:
+               // TODO.
+       default:
+               // Ignore the token.
+       }
+       return true
+}
+
 func (p *parser) parse() error {
        // Iterate until EOF. Any other error will cause an early return.
        consumed := true
index 8f8787886cee67bbcd6206184357d6af1276cc79..0eba283b985a1e26d6d17508839d9ad954c27eb0 100644 (file)
@@ -98,7 +98,11 @@ func dumpLevel(w io.Writer, n *Node, level int) error {
        case DocumentNode:
                return errors.New("unexpected DocumentNode")
        case ElementNode:
-               fmt.Fprintf(w, "<%s>", n.Data)
+               if n.Namespace != "" {
+                       fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
+               } else {
+                       fmt.Fprintf(w, "<%s>", n.Data)
+               }
                for _, a := range n.Attr {
                        io.WriteString(w, "\n")
                        dumpIndent(w, level+1)
@@ -161,6 +165,7 @@ func TestParser(t *testing.T) {
                n int
        }{
                // TODO(nigeltao): Process all the test cases from all the .dat files.
+               {"adoption01.dat", -1},
                {"doctype01.dat", -1},
                {"tests1.dat", -1},
                {"tests2.dat", -1},