--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// Section 12.2.5.5.
+var breakout = map[string]bool{
+ "b": true,
+ "big": true,
+ "blockquote": true,
+ "body": true,
+ "br": true,
+ "center": true,
+ "code": true,
+ "dd": true,
+ "div": true,
+ "dl": true,
+ "dt": true,
+ "em": true,
+ "embed": true,
+ "font": true,
+ "h1": true,
+ "h2": true,
+ "h3": true,
+ "h4": true,
+ "h5": true,
+ "h6": true,
+ "head": true,
+ "hr": true,
+ "i": true,
+ "img": true,
+ "li": true,
+ "listing": true,
+ "menu": true,
+ "meta": true,
+ "nobr": true,
+ "ol": true,
+ "p": true,
+ "pre": true,
+ "ruby": true,
+ "s": true,
+ "small": true,
+ "span": true,
+ "strong": true,
+ "strike": true,
+ "sub": true,
+ "sup": true,
+ "table": true,
+ "tt": true,
+ "u": true,
+ "ul": true,
+ "var": true,
+}
+
+// TODO: add look-up tables for MathML and SVG adjustments.
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
-// contain a slice of Attributes. Data is unescaped, so that it looks like
-// "a<b" rather than "a<b".
+// have a Namespace and contain a slice of Attributes. Data is unescaped, so
+// that it looks like "a<b" rather than "a<b".
type Node struct {
- Parent *Node
- Child []*Node
- Type NodeType
- Data string
- Attr []Attribute
+ Parent *Node
+ Child []*Node
+ Type NodeType
+ Data string
+ Namespace string
+ Attr []Attribute
}
// Add adds a node as a child of n.
// addElement calls addChild with an element node.
func (p *parser) addElement(tag string, attr []Attribute) {
p.addChild(&Node{
- Type: ElementNode,
- Data: tag,
- Attr: attr,
+ Type: ElementNode,
+ Data: tag,
+ Namespace: p.top().Namespace,
+ Attr: attr,
})
}
case "html":
p.im = beforeHeadIM
default:
- continue
+ if p.top().Namespace == "" {
+ continue
+ }
+ p.im = inForeignContentIM
}
return
}
p.reconstructActiveFormattingElements()
p.framesetOK = false
p.addElement(p.tok.Data, p.tok.Attr)
+ case "math", "svg":
+ p.reconstructActiveFormattingElements()
+ namespace := ""
+ if p.tok.Data == "math" {
+ // TODO: adjust MathML attributes.
+ namespace = "mathml"
+ } else {
+ // TODO: adjust SVG attributes.
+ namespace = "svg"
+ }
+ // TODO: adjust foreign attributes.
+ p.addElement(p.tok.Data, p.tok.Attr)
+ p.top().Namespace = namespace
+ p.im = inForeignContentIM
+ return true
case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr":
// Ignore the token.
default:
return true
}
+// TODO: fix up the other IM's section numbers to match the latest spec.
+
+// Section 12.2.5.5.
+func inForeignContentIM(p *parser) bool {
+ switch p.tok.Type {
+ case CommentToken:
+ p.addChild(&Node{
+ Type: CommentNode,
+ Data: p.tok.Data,
+ })
+ case StartTagToken:
+ if breakout[p.tok.Data] {
+ // TODO.
+ }
+ switch p.top().Namespace {
+ case "mathml":
+ // TODO: adjust MathML attributes.
+ case "svg":
+ // TODO: adjust SVG tag names.
+ // TODO: adjust SVG attributes.
+ default:
+ panic("html: bad parser state: unexpected namespace")
+ }
+ // TODO: adjust foreign attributes.
+ p.addElement(p.tok.Data, p.tok.Attr)
+ case EndTagToken:
+ // TODO.
+ default:
+ // Ignore the token.
+ }
+ return true
+}
+
func (p *parser) parse() error {
// Iterate until EOF. Any other error will cause an early return.
consumed := true
case DocumentNode:
return errors.New("unexpected DocumentNode")
case ElementNode:
- fmt.Fprintf(w, "<%s>", n.Data)
+ if n.Namespace != "" {
+ fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
+ } else {
+ fmt.Fprintf(w, "<%s>", n.Data)
+ }
for _, a := range n.Attr {
io.WriteString(w, "\n")
dumpIndent(w, level+1)
n int
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
+ {"adoption01.dat", -1},
{"doctype01.dat", -1},
{"tests1.dat", -1},
{"tests2.dat", -1},