]> Cypherpunks repositories - gostls13.git/commitdiff
exp/html: Convert \r and \r\n to \n when tokenizing
authorAndrew Balholm <andybalholm@gmail.com>
Wed, 30 May 2012 05:50:12 +0000 (15:50 +1000)
committerNigel Tao <nigeltao@golang.org>
Wed, 30 May 2012 05:50:12 +0000 (15:50 +1000)
Also escape "\r" as "&#13;" when rendering HTML.

Pass 2 additional tests.

R=nigeltao
CC=golang-dev
https://golang.org/cl/6260046

src/pkg/exp/html/escape.go
src/pkg/exp/html/testlogs/plain-text-unsafe.dat.log
src/pkg/exp/html/token.go
src/pkg/exp/html/token_test.go

index c177a66068e38050141d4744c1acb74b27eb10dd..6a9d8f6e6ff09eeffa188e290e865f68db0c6530 100644 (file)
@@ -192,7 +192,7 @@ func lower(b []byte) []byte {
        return b
 }
 
-const escapedChars = `&'<>"`
+const escapedChars = "&'<>\"\r"
 
 func escape(w writer, s string) error {
        i := strings.IndexAny(s, escapedChars)
@@ -214,6 +214,8 @@ func escape(w writer, s string) error {
                case '"':
                        // "&#34;" is shorter than "&quot;".
                        esc = "&#34;"
+               case '\r':
+                       esc = "&#13;"
                default:
                        panic("unrecognized escape character")
                }
index acf1780cf2bee5e4db3e06152c279fe77fd1b15c..1d8aee842354cc404ec0725eb48116ee3716a4c4 100644 (file)
@@ -21,8 +21,8 @@ PASS "<svg>\x00 </svg><frameset>"
 FAIL "<svg>\x00a</svg><frameset>"
 PASS "<svg><path></path></svg><frameset>"
 PASS "<svg><p><frameset>"
-FAIL "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
-FAIL "<!DOCTYPE html><pre>\r\rA</pre>"
+PASS "<!DOCTYPE html><pre>\r\n\r\nA</pre>"
+PASS "<!DOCTYPE html><pre>\r\rA</pre>"
 PASS "<!DOCTYPE html><pre>\rA</pre>"
 PASS "<!DOCTYPE html><table><tr><td><math><mtext>\x00a"
 PASS "<!DOCTYPE html><table><tr><td><svg><foreignObject>\x00a"
index b5e9c2d6ea34c274effe074a8fbce9d113230019..c9ab6e0761ad99ef7d6d05f4a1ca726128eb5947 100644 (file)
@@ -696,6 +696,38 @@ func (z *Tokenizer) Raw() []byte {
        return z.buf[z.raw.start:z.raw.end]
 }
 
+// convertNewlines converts "\r" and "\r\n" in s to "\n".
+// The conversion happens in place, but the resulting slice may be shorter.
+func convertNewlines(s []byte) []byte {
+       for i, c := range s {
+               if c != '\r' {
+                       continue
+               }
+
+               src := i + 1
+               if src >= len(s) || s[src] != '\n' {
+                       s[i] = '\n'
+                       continue
+               }
+
+               dst := i
+               for src < len(s) {
+                       if s[src] == '\r' {
+                               if src+1 < len(s) && s[src+1] == '\n' {
+                                       src++
+                               }
+                               s[dst] = '\n'
+                       } else {
+                               s[dst] = s[src]
+                       }
+                       src++
+                       dst++
+               }
+               return s[:dst]
+       }
+       return s
+}
+
 // Text returns the unescaped text of a text, comment or doctype token. The
 // contents of the returned slice may change on the next call to Next.
 func (z *Tokenizer) Text() []byte {
@@ -704,6 +736,7 @@ func (z *Tokenizer) Text() []byte {
                s := z.buf[z.data.start:z.data.end]
                z.data.start = z.raw.end
                z.data.end = z.raw.end
+               s = convertNewlines(s)
                if !z.textIsRaw {
                        s = unescape(s)
                }
@@ -739,7 +772,7 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
                        z.nAttrReturned++
                        key = z.buf[x[0].start:x[0].end]
                        val = z.buf[x[1].start:x[1].end]
-                       return lower(key), unescape(val), z.nAttrReturned < len(z.attr)
+                       return lower(key), unescape(convertNewlines(val)), z.nAttrReturned < len(z.attr)
                }
        }
        return nil, nil, false
index 942bbc8cf477dc4e53da3fc68c85258acf4087c4..a802bf3f2cd3107de3ca3a0746913349120c9cd1 100644 (file)
@@ -592,6 +592,33 @@ loop:
        }
 }
 
+func TestConvertNewlines(t *testing.T) {
+       testCases := map[string]string{
+               "Mac\rDOS\r\nUnix\n":    "Mac\nDOS\nUnix\n",
+               "Unix\nMac\rDOS\r\n":    "Unix\nMac\nDOS\n",
+               "DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
+               "":                      "",
+               "\n":                    "\n",
+               "\n\r":                  "\n\n",
+               "\r":                    "\n",
+               "\r\n":                  "\n",
+               "\r\n\n":                "\n\n",
+               "\r\n\r":                "\n\n",
+               "\r\n\r\n":              "\n\n",
+               "\r\r":                  "\n\n",
+               "\r\r\n":                "\n\n",
+               "\r\r\n\n":              "\n\n\n",
+               "\r\r\r\n":              "\n\n\n",
+               "\r \n":                 "\n \n",
+               "xyz":                   "xyz",
+       }
+       for in, want := range testCases {
+               if got := string(convertNewlines([]byte(in))); got != want {
+                       t.Errorf("input %q: got %q, want %q", in, got, want)
+               }
+       }
+}
+
 const (
        rawLevel = iota
        lowLevel