encoding/json: escape U+2028 and U+2029.

author David Symonds <dsymonds@golang.org>

Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)

committer David Symonds <dsymonds@golang.org>

Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)
author David Symonds <dsymonds@golang.org>
Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)
committer David Symonds <dsymonds@golang.org>
Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)
diff --git a/src/pkg/encoding/json/decode_test.go b/src/pkg/encoding/json/decode_test.go

index 97cbb4f09ba176e8d42527fef369df529382a62c..1191d6cee5048264d384427161c9b0bae4646d0a 100644 (file)
--- a/src/pkg/encoding/json/decode_test.go
+++ b/src/pkg/encoding/json/decode_test.go
@@ -568,14 +568,14 @@ func TestUnmarshalPtrPtr(t *testing.T) {
  }
  
  func TestEscape(t *testing.T) {
-       const input = `"foobar"<html>`
-       const expected = `"\"foobar\"\u003chtml\u003e"`
+       const input = `"foobar"<html>` + " [\u2028 \u2029]"
+       const expected = `"\"foobar\"\u003chtml\u003e [\u2028 \u2029]"`
         b, err := Marshal(input)
         if err != nil {
                 t.Fatalf("Marshal error: %v", err)
         }
         if s := string(b); s != expected {
-               t.Errorf("Encoding of [%s] was [%s], want [%s]", input, s, expected)
+               t.Errorf("Encoding of [%s]:\n got [%s]\nwant [%s]", input, s, expected)
         }
  }
  
diff --git a/src/pkg/encoding/json/encode.go b/src/pkg/encoding/json/encode.go

index e25a9b88058a9e9e01d207eae199e2c388bf6e37..55df9b5768b02003e484a5a4f52cd3b9efc03f6f 100644 (file)
--- a/src/pkg/encoding/json/encode.go
+++ b/src/pkg/encoding/json/encode.go
@@ -149,14 +149,14 @@ func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
         return buf.Bytes(), nil
  }
  
-// HTMLEscape appends to dst the JSON-encoded src with <, >, and &
-// characters inside string literals changed to \u003c, \u003e, \u0026
+// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
+// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
  // so that the JSON will be safe to embed inside HTML <script> tags.
  // For historical reasons, web browsers don't honor standard HTML
  // escaping within <script> tags, so an alternative JSON encoding must
  // be used.
  func HTMLEscape(dst *bytes.Buffer, src []byte) {
-       // < > & can only appear in string literals,
+       // The characters can only appear in string literals,
         // so just scan the string one byte at a time.
         start := 0
         for i, c := range src {
@@ -169,6 +169,15 @@ func HTMLEscape(dst *bytes.Buffer, src []byte) {
                         dst.WriteByte(hex[c&0xF])
                         start = i + 1
                 }
+               // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
+               if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
+                       if start < i {
+                               dst.Write(src[start:i])
+                       }
+                       dst.WriteString(`\u202`)
+                       dst.WriteByte(hex[src[i+2]&0xF])
+                       start = i + 3
+               }
         }
         if start < len(src) {
                 dst.Write(src[start:])
@@ -548,6 +557,23 @@ func (e *encodeState) string(s string) (int, error) {
                 if c == utf8.RuneError && size == 1 {
                         e.error(&InvalidUTF8Error{s})
                 }
+               // U+2028 is LINE SEPARATOR.
+               // U+2029 is PARAGRAPH SEPARATOR.
+               // They are both technically valid characters in JSON strings,
+               // but don't work in JSONP, which has to be evaluated as JavaScript,
+               // and can lead to security holes there. It is valid JSON to
+               // escape them, so we do so unconditionally.
+               // See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+               if c == '\u2028' || c == '\u2029' {
+                       if start < i {
+                               e.WriteString(s[start:i])
+                       }
+                       e.WriteString(`\u202`)
+                       e.WriteByte(hex[c&0xF])
+                       i += size
+                       start = i
+                       continue
+               }
                 i += size
         }
         if start < len(s) {
diff --git a/src/pkg/encoding/json/indent.go b/src/pkg/encoding/json/indent.go

index e8dfa4ec43630bb745c0e01346caa6999d51db02..11ef709cce7ce9478c1cfef733c4f59c3978ac6a 100644 (file)
--- a/src/pkg/encoding/json/indent.go
+++ b/src/pkg/encoding/json/indent.go
@@ -27,6 +27,15 @@ func compact(dst *bytes.Buffer, src []byte, escape bool) error {
                         dst.WriteByte(hex[c&0xF])
                         start = i + 1
                 }
+               // Convert U+2028 and U+2029 (E2 80 A8 and E2 80 A9).
+               if c == 0xE2 && i+2 < len(src) && src[i+1] == 0x80 && src[i+2]&^1 == 0xA8 {
+                       if start < i {
+                               dst.Write(src[start:i])
+                       }
+                       dst.WriteString(`\u202`)
+                       dst.WriteByte(hex[src[i+2]&0xF])
+                       start = i + 3
+               }
                 v := scan.step(&scan, int(c))
                 if v >= scanSkipSpace {
                         if v == scanError {
diff --git a/src/pkg/encoding/json/scanner_test.go b/src/pkg/encoding/json/scanner_test.go

index 77d3455d3073747d663c3043a4e0b3d9a8f3bcab..90e45ff0369081d2f97618cb228c21bcefdd797e 100644 (file)
--- a/src/pkg/encoding/json/scanner_test.go
+++ b/src/pkg/encoding/json/scanner_test.go
@@ -63,6 +63,25 @@ func TestCompact(t *testing.T) {
         }
  }
  
+func TestCompactSeparators(t *testing.T) {
+       // U+2028 and U+2029 should be escaped inside strings.
+       // They should not appear outside strings.
+       tests := []struct {
+               in, compact string
+       }{
+               {"{\"\u2028\": 1}", `{"\u2028":1}`},
+               {"{\"\u2029\" :2}", `{"\u2029":2}`},
+       }
+       for _, tt := range tests {
+               var buf bytes.Buffer
+               if err := Compact(&buf, []byte(tt.in)); err != nil {
+                       t.Errorf("Compact(%q): %v", tt.in, err)
+               } else if s := buf.String(); s != tt.compact {
+                       t.Errorf("Compact(%q) = %q, want %q", tt.in, s, tt.compact)
+               }
+       }
+}
+
  func TestIndent(t *testing.T) {
         var buf bytes.Buffer
         for _, tt := range examples {
author	David Symonds <dsymonds@golang.org>
	Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)
committer	David Symonds <dsymonds@golang.org>
	Fri, 12 Jul 2013 04:35:55 +0000 (14:35 +1000)
src/pkg/encoding/json/decode_test.go		patch \| blob \| history
src/pkg/encoding/json/encode.go		patch \| blob \| history
src/pkg/encoding/json/indent.go		patch \| blob \| history
src/pkg/encoding/json/scanner_test.go		patch \| blob \| history