]> Cypherpunks repositories - gostls13.git/commitdiff
html: spin doctype.go out of parse.go.
authorNigel Tao <nigeltao@golang.org>
Tue, 29 Nov 2011 07:20:59 +0000 (18:20 +1100)
committerNigel Tao <nigeltao@golang.org>
Tue, 29 Nov 2011 07:20:59 +0000 (18:20 +1100)
R=andybalholm
CC=golang-dev
https://golang.org/cl/5445049

src/pkg/html/Makefile
src/pkg/html/doctype.go [new file with mode: 0644]
src/pkg/html/parse.go

index 2d664720d3f0aea00f7c87788591c99330cc4c75..3c3de8ee310921b19f33e7dc7964468581355e21 100644 (file)
@@ -8,6 +8,7 @@ TARG=html
 GOFILES=\
        const.go\
        doc.go\
+       doctype.go\
        entity.go\
        escape.go\
        node.go\
diff --git a/src/pkg/html/doctype.go b/src/pkg/html/doctype.go
new file mode 100644 (file)
index 0000000..f692061
--- /dev/null
@@ -0,0 +1,156 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "strings"
+)
+
+// parseDoctype parses the data from a DoctypeToken into a name,
+// public identifier, and system identifier. It returns a Node whose Type 
+// is DoctypeNode, whose Data is the name, and which has attributes
+// named "system" and "public" for the two identifiers if they were present.
+// quirks is whether the document should be parsed in "quirks mode".
+func parseDoctype(s string) (n *Node, quirks bool) {
+       n = &Node{Type: DoctypeNode}
+
+       // Find the name.
+       space := strings.IndexAny(s, whitespace)
+       if space == -1 {
+               space = len(s)
+       }
+       n.Data = s[:space]
+       // The comparison to "html" is case-sensitive.
+       if n.Data != "html" {
+               quirks = true
+       }
+       n.Data = strings.ToLower(n.Data)
+       s = strings.TrimLeft(s[space:], whitespace)
+
+       if len(s) < 6 {
+               // It can't start with "PUBLIC" or "SYSTEM".
+               // Ignore the rest of the string.
+               return n, quirks || s != ""
+       }
+
+       key := strings.ToLower(s[:6])
+       s = s[6:]
+       for key == "public" || key == "system" {
+               s = strings.TrimLeft(s, whitespace)
+               if s == "" {
+                       break
+               }
+               quote := s[0]
+               if quote != '"' && quote != '\'' {
+                       break
+               }
+               s = s[1:]
+               q := strings.IndexRune(s, rune(quote))
+               var id string
+               if q == -1 {
+                       id = s
+                       s = ""
+               } else {
+                       id = s[:q]
+                       s = s[q+1:]
+               }
+               n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
+               if key == "public" {
+                       key = "system"
+               } else {
+                       key = ""
+               }
+       }
+
+       if key != "" || s != "" {
+               quirks = true
+       } else if len(n.Attr) > 0 {
+               if n.Attr[0].Key == "public" {
+                       public := strings.ToLower(n.Attr[0].Val)
+                       switch public {
+                       case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
+                               quirks = true
+                       default:
+                               for _, q := range quirkyIDs {
+                                       if strings.HasPrefix(public, q) {
+                                               quirks = true
+                                               break
+                                       }
+                               }
+                       }
+                       // The following two public IDs only cause quirks mode if there is no system ID.
+                       if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
+                               strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
+                               quirks = true
+                       }
+               }
+               if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
+                       strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
+                       quirks = true
+               }
+       }
+
+       return n, quirks
+}
+
+// quirkyIDs is a list of public doctype identifiers that cause a document
+// to be interpreted in quirks mode. The identifiers should be in lower case.
+var quirkyIDs = []string{
+       "+//silmaril//dtd html pro v0r11 19970101//",
+       "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+       "-//as//dtd html 3.0 aswedit + extensions//",
+       "-//ietf//dtd html 2.0 level 1//",
+       "-//ietf//dtd html 2.0 level 2//",
+       "-//ietf//dtd html 2.0 strict level 1//",
+       "-//ietf//dtd html 2.0 strict level 2//",
+       "-//ietf//dtd html 2.0 strict//",
+       "-//ietf//dtd html 2.0//",
+       "-//ietf//dtd html 2.1e//",
+       "-//ietf//dtd html 3.0//",
+       "-//ietf//dtd html 3.2 final//",
+       "-//ietf//dtd html 3.2//",
+       "-//ietf//dtd html 3//",
+       "-//ietf//dtd html level 0//",
+       "-//ietf//dtd html level 1//",
+       "-//ietf//dtd html level 2//",
+       "-//ietf//dtd html level 3//",
+       "-//ietf//dtd html strict level 0//",
+       "-//ietf//dtd html strict level 1//",
+       "-//ietf//dtd html strict level 2//",
+       "-//ietf//dtd html strict level 3//",
+       "-//ietf//dtd html strict//",
+       "-//ietf//dtd html//",
+       "-//metrius//dtd metrius presentational//",
+       "-//microsoft//dtd internet explorer 2.0 html strict//",
+       "-//microsoft//dtd internet explorer 2.0 html//",
+       "-//microsoft//dtd internet explorer 2.0 tables//",
+       "-//microsoft//dtd internet explorer 3.0 html strict//",
+       "-//microsoft//dtd internet explorer 3.0 html//",
+       "-//microsoft//dtd internet explorer 3.0 tables//",
+       "-//netscape comm. corp.//dtd html//",
+       "-//netscape comm. corp.//dtd strict html//",
+       "-//o'reilly and associates//dtd html 2.0//",
+       "-//o'reilly and associates//dtd html extended 1.0//",
+       "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+       "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+       "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+       "-//spyglass//dtd html 2.0 extended//",
+       "-//sq//dtd html 2.0 hotmetal + extensions//",
+       "-//sun microsystems corp.//dtd hotjava html//",
+       "-//sun microsystems corp.//dtd hotjava strict html//",
+       "-//w3c//dtd html 3 1995-03-24//",
+       "-//w3c//dtd html 3.2 draft//",
+       "-//w3c//dtd html 3.2 final//",
+       "-//w3c//dtd html 3.2//",
+       "-//w3c//dtd html 3.2s draft//",
+       "-//w3c//dtd html 4.0 frameset//",
+       "-//w3c//dtd html 4.0 transitional//",
+       "-//w3c//dtd html experimental 19960712//",
+       "-//w3c//dtd html experimental 970421//",
+       "-//w3c//dtd w3 html//",
+       "-//w3o//dtd w3 html 3.0//",
+       "-//webtechs//dtd mozilla html 2.0//",
+       "-//webtechs//dtd mozilla html//",
+}
index bb9fdcea5966c8e9272cd41237fc0d0229ac0e44..3011064e74f16abd68c925c445e765e94344efbd 100644 (file)
@@ -323,153 +323,6 @@ func (p *parser) resetInsertionMode() {
 
 const whitespace = " \t\r\n\f"
 
-// quirkyIDs is a list of public doctype identifiers that cause a document
-// to be interpreted in quirks mode. The identifiers should be in lower case.
-var quirkyIDs = []string{
-       "+//silmaril//dtd html pro v0r11 19970101//",
-       "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-       "-//as//dtd html 3.0 aswedit + extensions//",
-       "-//ietf//dtd html 2.0 level 1//",
-       "-//ietf//dtd html 2.0 level 2//",
-       "-//ietf//dtd html 2.0 strict level 1//",
-       "-//ietf//dtd html 2.0 strict level 2//",
-       "-//ietf//dtd html 2.0 strict//",
-       "-//ietf//dtd html 2.0//",
-       "-//ietf//dtd html 2.1e//",
-       "-//ietf//dtd html 3.0//",
-       "-//ietf//dtd html 3.2 final//",
-       "-//ietf//dtd html 3.2//",
-       "-//ietf//dtd html 3//",
-       "-//ietf//dtd html level 0//",
-       "-//ietf//dtd html level 1//",
-       "-//ietf//dtd html level 2//",
-       "-//ietf//dtd html level 3//",
-       "-//ietf//dtd html strict level 0//",
-       "-//ietf//dtd html strict level 1//",
-       "-//ietf//dtd html strict level 2//",
-       "-//ietf//dtd html strict level 3//",
-       "-//ietf//dtd html strict//",
-       "-//ietf//dtd html//",
-       "-//metrius//dtd metrius presentational//",
-       "-//microsoft//dtd internet explorer 2.0 html strict//",
-       "-//microsoft//dtd internet explorer 2.0 html//",
-       "-//microsoft//dtd internet explorer 2.0 tables//",
-       "-//microsoft//dtd internet explorer 3.0 html strict//",
-       "-//microsoft//dtd internet explorer 3.0 html//",
-       "-//microsoft//dtd internet explorer 3.0 tables//",
-       "-//netscape comm. corp.//dtd html//",
-       "-//netscape comm. corp.//dtd strict html//",
-       "-//o'reilly and associates//dtd html 2.0//",
-       "-//o'reilly and associates//dtd html extended 1.0//",
-       "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-       "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-       "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-       "-//spyglass//dtd html 2.0 extended//",
-       "-//sq//dtd html 2.0 hotmetal + extensions//",
-       "-//sun microsystems corp.//dtd hotjava html//",
-       "-//sun microsystems corp.//dtd hotjava strict html//",
-       "-//w3c//dtd html 3 1995-03-24//",
-       "-//w3c//dtd html 3.2 draft//",
-       "-//w3c//dtd html 3.2 final//",
-       "-//w3c//dtd html 3.2//",
-       "-//w3c//dtd html 3.2s draft//",
-       "-//w3c//dtd html 4.0 frameset//",
-       "-//w3c//dtd html 4.0 transitional//",
-       "-//w3c//dtd html experimental 19960712//",
-       "-//w3c//dtd html experimental 970421//",
-       "-//w3c//dtd w3 html//",
-       "-//w3o//dtd w3 html 3.0//",
-       "-//webtechs//dtd mozilla html 2.0//",
-       "-//webtechs//dtd mozilla html//",
-}
-
-// parseDoctype parses the data from a DoctypeToken into a name,
-// public identifier, and system identifier. It returns a Node whose Type 
-// is DoctypeNode, whose Data is the name, and which has attributes
-// named "system" and "public" for the two identifiers if they were present.
-// quirks is whether the document should be parsed in "quirks mode".
-func parseDoctype(s string) (n *Node, quirks bool) {
-       n = &Node{Type: DoctypeNode}
-
-       // Find the name.
-       space := strings.IndexAny(s, whitespace)
-       if space == -1 {
-               space = len(s)
-       }
-       n.Data = s[:space]
-       // The comparison to "html" is case-sensitive.
-       if n.Data != "html" {
-               quirks = true
-       }
-       n.Data = strings.ToLower(n.Data)
-       s = strings.TrimLeft(s[space:], whitespace)
-
-       if len(s) < 6 {
-               // It can't start with "PUBLIC" or "SYSTEM".
-               // Ignore the rest of the string.
-               return n, quirks || s != ""
-       }
-
-       key := strings.ToLower(s[:6])
-       s = s[6:]
-       for key == "public" || key == "system" {
-               s = strings.TrimLeft(s, whitespace)
-               if s == "" {
-                       break
-               }
-               quote := s[0]
-               if quote != '"' && quote != '\'' {
-                       break
-               }
-               s = s[1:]
-               q := strings.IndexRune(s, rune(quote))
-               var id string
-               if q == -1 {
-                       id = s
-                       s = ""
-               } else {
-                       id = s[:q]
-                       s = s[q+1:]
-               }
-               n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
-               if key == "public" {
-                       key = "system"
-               } else {
-                       key = ""
-               }
-       }
-
-       if key != "" || s != "" {
-               quirks = true
-       } else if len(n.Attr) > 0 {
-               if n.Attr[0].Key == "public" {
-                       public := strings.ToLower(n.Attr[0].Val)
-                       switch public {
-                       case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
-                               quirks = true
-                       default:
-                               for _, q := range quirkyIDs {
-                                       if strings.HasPrefix(public, q) {
-                                               quirks = true
-                                               break
-                                       }
-                               }
-                       }
-                       // The following two public IDs only cause quirks mode if there is no system ID.
-                       if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
-                               strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
-                               quirks = true
-                       }
-               }
-               if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
-                       strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
-                       quirks = true
-               }
-       }
-
-       return n, quirks
-}
-
 // Section 11.2.5.4.1.
 func initialIM(p *parser) bool {
        switch p.tok.Type {