package html
+import (
+ "exp/html/atom"
+)
+
// A NodeType is the type of a Node.
-type NodeType int
+type NodeType uint32
const (
ErrorNode NodeType = iota
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
-// that it looks like "a<b" rather than "a<b".
+// that it looks like "a<b" rather than "a<b". For element nodes, DataAtom
+// is the atom for Data, or zero if Data is not a known tag name.
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
Parent *Node
Child []*Node
Type NodeType
+ DataAtom atom.Atom
Data string
Namespace string
Attr []Attribute
// The clone has no parent and no children.
func (n *Node) clone() *Node {
m := &Node{
- Type: n.Type,
- Data: n.Data,
- Attr: make([]Attribute, len(n.Attr)),
+ Type: n.Type,
+ DataAtom: n.DataAtom,
+ Data: n.Data,
+ Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
package html
import (
+ a "exp/html/atom"
"io"
"strings"
)
func (p *parser) addElement(tag string, attr []Attribute) {
p.addChild(&Node{
Type: ElementNode,
- Data: tag,
+ Data: tag, // TODO: also set DataAtom.
Attr: attr,
})
}
continue
}
compareAttributes:
- for _, a := range n.Attr {
- for _, b := range attr {
- if a.Key == b.Key && a.Namespace == b.Namespace && a.Val == b.Val {
+ for _, t0 := range n.Attr {
+ for _, t1 := range attr {
+ if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
// Found a match for this attribute, continue with the next attribute.
continue compareAttributes
}
return
}
attr := map[string]string{}
- for _, a := range dst.Attr {
- attr[a.Key] = a.Val
+ for _, t := range dst.Attr {
+ attr[t.Key] = t.Val
}
- for _, a := range src.Attr {
- if _, ok := attr[a.Key]; !ok {
- dst.Attr = append(dst.Attr, a)
- attr[a.Key] = a.Val
+ for _, t := range src.Attr {
+ if _, ok := attr[t.Key]; !ok {
+ dst.Attr = append(dst.Attr, t)
+ attr[t.Key] = t.Val
}
}
}
p.oe.pop()
p.acknowledgeSelfClosingTag()
if p.tok.Data == "input" {
- for _, a := range p.tok.Attr {
- if a.Key == "type" {
- if strings.ToLower(a.Val) == "hidden" {
+ for _, t := range p.tok.Attr {
+ if t.Key == "type" {
+ if strings.ToLower(t.Val) == "hidden" {
// Skip setting framesetOK = false
return true
}
action := ""
prompt := "This is a searchable index. Enter search keywords: "
attr := []Attribute{{Key: "name", Val: "isindex"}}
- for _, a := range p.tok.Attr {
- switch a.Key {
+ for _, t := range p.tok.Attr {
+ switch t.Key {
case "action":
- action = a.Val
+ action = t.Val
case "name":
// Ignore the attribute.
case "prompt":
- prompt = a.Val
+ prompt = t.Val
default:
- attr = append(attr, a)
+ attr = append(attr, t)
}
}
p.acknowledgeSelfClosingTag()
case "style", "script":
return inHeadIM(p)
case "input":
- for _, a := range p.tok.Attr {
- if a.Key == "type" && strings.ToLower(a.Val) == "hidden" {
+ for _, t := range p.tok.Attr {
+ if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
p.addElement(p.tok.Data, p.tok.Attr)
p.oe.pop()
return true
// Adjust SVG tag names. The tokenizer lower-cases tag names, but
// SVG wants e.g. "foreignObject" with a capital second "O".
if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
+ p.tok.DataAtom = a.Lookup([]byte(x))
p.tok.Data = x
}
adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
realToken, selfClosing := p.tok, p.hasSelfClosingToken
p.tok = Token{
Type: t,
- Data: data,
+ Data: data, // TODO: also set DataAtom.
Attr: attr,
}
p.hasSelfClosingToken = false
root := &Node{
Type: ElementNode,
- Data: "html",
+ Data: "html", // TODO: also set DataAtom.
}
p.doc.Add(root)
p.oe = nodeStack{root}
)
// A TokenType is the type of a Token.
-type TokenType int
+type TokenType uint32
const (
// ErrorToken means that an error occurred during tokenization.
// A Token consists of a TokenType and some Data (tag name for start and end
// tags, content for text, comments and doctypes). A tag Token may also contain
// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
-// rather than "a<b").
+// rather than "a<b"). For tag Tokens, DataAtom is the atom for Data, or
+// zero if Data is not a known tag name.
type Token struct {
- Type TokenType
- Data string
- Attr []Attribute
+ Type TokenType
+ DataAtom atom.Atom
+ Data string
+ Attr []Attribute
}
// tagString returns a string representation of a tag Token's Data and Attr.
key, val, moreAttr = z.TagAttr()
attr = append(attr, Attribute{"", atom.String(key), string(val)})
}
- t.Data = atom.String(name)
+ if a := atom.Lookup(name); a != 0 {
+ t.DataAtom, t.Data = a, a.String()
+ } else {
+ t.DataAtom, t.Data = 0, string(name)
+ }
t.Attr = attr
case EndTagToken:
name, _ := z.TagName()
- t.Data = atom.String(name)
+ if a := atom.Lookup(name); a != 0 {
+ t.DataAtom, t.Data = a, a.String()
+ } else {
+ t.DataAtom, t.Data = 0, string(name)
+ }
}
return t
}