]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/json: document and test use of unicode.ReplacementChar
authorRuss Cox <rsc@golang.org>
Thu, 14 Feb 2013 19:56:01 +0000 (14:56 -0500)
committerRuss Cox <rsc@golang.org>
Thu, 14 Feb 2013 19:56:01 +0000 (14:56 -0500)
Fixes #4783.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/7314099

src/pkg/encoding/json/decode.go
src/pkg/encoding/json/decode_test.go

index ffe9d77b7ab112fe6168ca48bb462c5b810f23b1..f2ec9cb67240cd3ef4472510062b5bc3e2ee4df4 100644 (file)
@@ -55,6 +55,11 @@ import (
 // If no more serious errors are encountered, Unmarshal returns
 // an UnmarshalTypeError describing the earliest such error.
 //
+// When unmarshaling quoted strings, invalid UTF-8 or
+// invalid UTF-16 surrogate pairs are not treated as an error.
+// Instead, they are replaced by the Unicode replacement
+// character U+FFFD.
+//
 func Unmarshal(data []byte, v interface{}) error {
        // Check for well-formedness.
        // Avoids filling out half a data structure
index 524a9989fe74743b76bd09d317f0ffbd7475ec12..1ce26f8fb300161cc63f71f67b1b4743b49a6f2d 100644 (file)
@@ -330,6 +330,43 @@ var unmarshalTests = []unmarshalTest{
                ptr: new(S10),
                out: S10{S13: S13{S8: S8{S9: S9{Y: 2}}}},
        },
+
+       // invalid UTF-8 is coerced to valid UTF-8.
+       {
+               in:  "\"hello\xffworld\"",
+               ptr: new(string),
+               out: "hello\ufffdworld",
+       },
+       {
+               in:  "\"hello\xc2\xc2world\"",
+               ptr: new(string),
+               out: "hello\ufffd\ufffdworld",
+       },
+       {
+               in:  "\"hello\xc2\xffworld\"",
+               ptr: new(string),
+               out: "hello\ufffd\ufffdworld",
+       },
+       {
+               in:  "\"hello\\ud800world\"",
+               ptr: new(string),
+               out: "hello\ufffdworld",
+       },
+       {
+               in:  "\"hello\\ud800\\ud800world\"",
+               ptr: new(string),
+               out: "hello\ufffd\ufffdworld",
+       },
+       {
+               in:  "\"hello\\ud800\\ud800world\"",
+               ptr: new(string),
+               out: "hello\ufffd\ufffdworld",
+       },
+       {
+               in:  "\"hello\xed\xa0\x80\xed\xb0\x80world\"",
+               ptr: new(string),
+               out: "hello\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdworld",
+       },
 }
 
 func TestMarshal(t *testing.T) {