html: parse the contents of <iframe> elements as raw text

author Andrew Balholm <andybalholm@gmail.com>

Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)

committer Nigel Tao <nigeltao@golang.org>

Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)
author Andrew Balholm <andybalholm@gmail.com>
Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)
committer Nigel Tao <nigeltao@golang.org>
Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go

index 3805bd7e9e9deb0b2db6fe48ea13464369af1ce6..1e39f3ed70bc79318ef4a27b889903fb84e3c9c2 100644 (file)
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -153,6 +153,8 @@ func TestParser(t *testing.T) {
                 {"tests1.dat", -1},
                 {"tests2.dat", -1},
                 {"tests3.dat", -1},
+               // tests4.dat is fragment cases.
+               {"tests5.dat", 10},
         }
         for _, tf := range testFiles {
                 f, err := os.Open("testdata/webkit/" + tf.filename)
diff --git a/src/pkg/html/render.go b/src/pkg/html/render.go

index 2c868f511d7082a3d43a2d9ea201ab4f24e46c8f..2a57566fd43373674ec0940b62be508725efdca7 100644 (file)
--- a/src/pkg/html/render.go
+++ b/src/pkg/html/render.go
@@ -185,7 +185,7 @@ func render1(w writer, n *Node) error {
  
         // Render any child nodes.
         switch n.Data {
-       case "noembed", "noframes", "noscript", "plaintext", "script", "style":
+       case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style":
                 for _, c := range n.Child {
                         if c.Type != TextNode {
                                 return fmt.Errorf("html: raw text element <%s> has non-text child node", n.Data)
diff --git a/src/pkg/html/token.go b/src/pkg/html/token.go

index a6fbcdfcfe52ef785779985e69171e51ce9d4635..2a2f96bbab63fa6a20ecb5ea042ceb09ecee299b 100644 (file)
--- a/src/pkg/html/token.go
+++ b/src/pkg/html/token.go
@@ -405,14 +405,13 @@ func (z *Tokenizer) readStartTag() TokenType {
                         break
                 }
         }
-       // Any "<noembed>", "<noframes>", "<noscript>", "<plaintext", "<script>", "<style>",
-       // "<textarea>" or "<title>" tag flags the tokenizer's next token as raw.
+       // Several tags flag the tokenizer's next token as raw.
         // The tag name lengths of these special cases ranges in [5, 9].
         if x := z.data.end - z.data.start; 5 <= x && x <= 9 {
                 switch z.buf[z.data.start] {
-               case 'n', 'p', 's', 't', 'N', 'P', 'S', 'T':
+               case 'i', 'n', 'p', 's', 't', 'I', 'N', 'P', 'S', 'T':
                         switch s := strings.ToLower(string(z.buf[z.data.start:z.data.end])); s {
-                       case "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title":
+                       case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "textarea", "title":
                                 z.rawTag = s
                         }
                 }
author	Andrew Balholm <andybalholm@gmail.com>
	Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)
committer	Nigel Tao <nigeltao@golang.org>
	Wed, 30 Nov 2011 00:44:54 +0000 (11:44 +1100)
src/pkg/html/parse_test.go		patch \| blob \| history
src/pkg/html/render.go		patch \| blob \| history
src/pkg/html/token.go		patch \| blob \| history