Attr []Attribute
}
-// An insertion mode (section 10.2.3.1) is the state transition function from
-// a particular state in the HTML5 parser's state machine. In addition to
-// returning the next state, it also returns whether the token was consumed.
-type insertionMode func(*parser) (insertionMode, bool)
-
// A parser implements the HTML5 parsing algorithm:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
type parser struct {
p.tok.Attr = nil
return nil
}
- if tokenType := p.tokenizer.Next(); tokenType == ErrorToken {
- return p.tokenizer.Error()
- }
+ p.tokenizer.Next()
p.tok = p.tokenizer.Token()
- if p.tok.Type == SelfClosingTagToken {
+ switch p.tok.Type {
+ case ErrorToken:
+ return p.tokenizer.Error()
+ case SelfClosingTagToken:
p.hasSelfClosingToken = true
p.tok.Type = StartTagToken
}
p.hasSelfClosingToken = false
}
+// An insertion mode (section 10.2.3.1) is the state transition function from
+// a particular state in the HTML5 parser's state machine. It updates the
+// parser's fields depending on parser.token (where ErrorToken means EOF). In
+// addition to returning the next insertionMode state, it also returns whether
+// the token was consumed.
+type insertionMode func(*parser) (insertionMode, bool)
+
// Section 10.2.5.4.
func initialInsertionMode(p *parser) (insertionMode, bool) {
// TODO(nigeltao): check p.tok for DOCTYPE.
implied bool
)
switch p.tok.Type {
+ case ErrorToken:
+ implied = true
case TextToken:
// TODO(nigeltao): distinguish whitespace text from others.
implied = true
implied = true
}
case EndTagToken:
- // TODO.
+ switch p.tok.Data {
+ case "head", "body", "html", "br":
+ implied = true
+ default:
+ // Ignore the token.
+ }
}
if add || implied {
p.addChild(&Node{
implied bool
)
switch p.tok.Type {
+ case ErrorToken:
+ implied = true
case TextToken:
// TODO(nigeltao): distinguish whitespace text from others.
implied = true
add = true
attr = p.tok.Attr
case "html":
- // TODO.
+ return inBodyInsertionMode, false
default:
implied = true
}
case EndTagToken:
- // TODO.
+ switch p.tok.Data {
+ case "head", "body", "html", "br":
+ implied = true
+ default:
+ // Ignore the token.
+ }
}
if add || implied {
p.addChild(&Node{
implied bool
)
switch p.tok.Type {
- case TextToken:
+ case ErrorToken, TextToken:
implied = true
case StartTagToken:
switch p.tok.Data {
implied bool
)
switch p.tok.Type {
- case TextToken:
+ case ErrorToken, TextToken:
implied = true
framesetOK = true
case StartTagToken:
func inBodyInsertionMode(p *parser) (insertionMode, bool) {
var endP bool
switch p.tok.Type {
+ case ErrorToken:
+ // No-op.
case TextToken:
p.addText(p.tok.Data)
p.framesetOK = false
// Section 10.2.5.22.
func afterBodyInsertionMode(p *parser) (insertionMode, bool) {
switch p.tok.Type {
+ case ErrorToken:
+ // TODO.
case TextToken:
// TODO.
case StartTagToken:
scripting: true,
framesetOK: true,
}
+ // Iterate until EOF. Any other error will cause an early return.
im, consumed := initialInsertionMode, true
for {
if consumed {
}
im, consumed = im(p)
}
- // TODO(nigeltao): clean up, depending on the value of im.
- // The specification's algorithm does clean up on reading an EOF 'token',
- // but in go we represent EOF by an os.Error instead.
+ // Loop until the final token (the ErrorToken signifying EOF) is consumed.
+ for {
+ if im, consumed = im(p); consumed {
+ break
+ }
+ }
return p.doc, nil
}
if n == nil || len(n.Child) == 0 {
return "", nil
}
- if len(n.Child) > 1 {
- return "too many children", nil
- }
b := bytes.NewBuffer(nil)
- if err := dumpLevel(b, n.Child[0], 0); err != nil {
- return "", err
+ for _, child := range n.Child {
+ if err := dumpLevel(b, child, 0); err != nil {
+ return "", err
+ }
}
return b.String(), nil
}
for _, filename := range filenames {
rc := make(chan io.Reader)
go readDat(filename, rc)
- // TODO(nigeltao): Process all test cases, not just the first three.
- for i := 0; i < 3; i++ {
+ // TODO(nigeltao): Process all test cases, not just a subset.
+ for i := 0; i < 19; i++ {
// Parse the #data section.
doc, err := Parse(<-rc)
if err != nil {