Pass tests2.dat, test 59:
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
| <!DOCTYPE <!doctype>
| <html>
| <head>
| <body>
| ">"
| <!-- <!--x -->
| "-->"
Pass all the tests in doctype01.dat.
Also pass tests2.dat, test 60:
<!doctype html><div><form></form><div></div></div>
R=nigeltao
CC=golang-dev
https://golang.org/cl/
5437045
const whitespace = " \t\r\n\f"
+// parseDoctype parses the data from a DoctypeToken into a name,
+// public identifier, and system identifier. It returns a Node whose Type
+// is DoctypeNode, whose Data is the name, and which has attributes
+// named "system" and "public" for the two identifiers if they were present.
+func parseDoctype(s string) *Node {
+ n := &Node{Type: DoctypeNode}
+
+ // Find the name.
+ space := strings.IndexAny(s, whitespace)
+ if space == -1 {
+ space = len(s)
+ }
+ n.Data = strings.ToLower(s[:space])
+ s = strings.TrimLeft(s[space:], whitespace)
+
+ if len(s) < 6 {
+ // It can't start with "PUBLIC" or "SYSTEM".
+ // Ignore the rest of the string.
+ return n
+ }
+
+ key := strings.ToLower(s[:6])
+ s = s[6:]
+ for key == "public" || key == "system" {
+ s = strings.TrimLeft(s, whitespace)
+ if s == "" {
+ break
+ }
+ quote := s[0]
+ if quote != '"' && quote != '\'' {
+ break
+ }
+ s = s[1:]
+ q := strings.IndexRune(s, rune(quote))
+ var id string
+ if q == -1 {
+ id = s
+ s = ""
+ } else {
+ id = s[:q]
+ s = s[q+1:]
+ }
+ n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
+ if key == "public" {
+ key = "system"
+ } else {
+ key = ""
+ }
+ }
+
+ return n
+}
+
// Section 11.2.5.4.1.
func initialIM(p *parser) bool {
switch p.tok.Type {
})
return true
case DoctypeToken:
- p.doc.Add(&Node{
- Type: DoctypeNode,
- Data: p.tok.Data,
- })
+ p.doc.Add(parseDoctype(p.tok.Data))
p.im = beforeHTMLIM
return true
}
case CommentNode:
fmt.Fprintf(w, "<!-- %s -->", n.Data)
case DoctypeNode:
- fmt.Fprintf(w, "<!DOCTYPE %s>", n.Data)
+ fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
+ if n.Attr != nil {
+ var p, s string
+ for _, a := range n.Attr {
+ switch a.Key {
+ case "public":
+ p = a.Val
+ case "system":
+ s = a.Val
+ }
+ }
+ if p != "" || s != "" {
+ fmt.Fprintf(w, ` "%s"`, p)
+ fmt.Fprintf(w, ` "%s"`, s)
+ }
+ }
+ io.WriteString(w, ">")
case scopeMarkerNode:
return errors.New("unexpected scopeMarkerNode")
default:
n int
}{
// TODO(nigeltao): Process all the test cases from all the .dat files.
+ {"doctype01.dat", -1},
{"tests1.dat", -1},
- {"tests2.dat", 59},
+ {"tests2.dat", -1},
{"tests3.dat", 0},
}
for _, tf := range testFiles {
"errors"
"fmt"
"io"
+ "strings"
)
type writer interface {
if _, err := w.WriteString(n.Data); err != nil {
return err
}
+ if n.Attr != nil {
+ var p, s string
+ for _, a := range n.Attr {
+ switch a.Key {
+ case "public":
+ p = a.Val
+ case "system":
+ s = a.Val
+ }
+ }
+ if p != "" {
+ if _, err := w.WriteString(" PUBLIC "); err != nil {
+ return err
+ }
+ if err := writeQuoted(w, p); err != nil {
+ return err
+ }
+ if s != "" {
+ if err := w.WriteByte(' '); err != nil {
+ return err
+ }
+ if err := writeQuoted(w, s); err != nil {
+ return err
+ }
+ }
+ } else if s != "" {
+ if _, err := w.WriteString(" SYSTEM "); err != nil {
+ return err
+ }
+ if err := writeQuoted(w, s); err != nil {
+ return err
+ }
+ }
+ }
return w.WriteByte('>')
default:
return errors.New("html: unknown node type")
return w.WriteByte('>')
}
+// writeQuoted writes s to w surrounded by quotes. Normally it will use double
+// quotes, but if s contains a double quote, it will use single quotes.
+// It is used for writing the identifiers in a doctype declaration.
+// In valid HTML, they can't contain both types of quotes.
+func writeQuoted(w writer, s string) error {
+ var q byte = '"'
+ if strings.Contains(s, `"`) {
+ q = '\''
+ }
+ if err := w.WriteByte(q); err != nil {
+ return err
+ }
+ if _, err := w.WriteString(s); err != nil {
+ return err
+ }
+ if err := w.WriteByte(q); err != nil {
+ return err
+ }
+ return nil
+}
+
// Section 13.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{