From: Russ Cox Date: Fri, 12 Jul 2013 21:37:10 +0000 (-0400) Subject: encoding/json: coerce invalid UTF-8 to valid UTF-8 during Marshal X-Git-Tag: go1.2rc2~1060 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=64054a40ad0d85e82f77a4982ea4ee08c3cea40a;p=gostls13.git encoding/json: coerce invalid UTF-8 to valid UTF-8 during Marshal In practice, rejecting an entire structure due to a single invalid byte in a string is just too picky, and too hard to track down. Be consistent with the bulk of the standard library by converting invalid UTF-8 into UTF-8 with replacement runes. R=golang-dev, crawshaw CC=golang-dev https://golang.org/cl/11211045 --- diff --git a/doc/go1.2.txt b/doc/go1.2.txt index a4f946c083..079b4f762b 100644 --- a/doc/go1.2.txt +++ b/doc/go1.2.txt @@ -17,6 +17,7 @@ crypto/sha1: Sum function to simplify hashing (CL 10571043). crypto/sha256: Sum256 and Sum224 functions to simplify hashing (CL 10629043). crypto/sha512: Sum512 and Sum384 functions to simplify hashing (CL 10630043). crypto/tls: add support for TLS 1.1. (CL 7872043). +encoding/json: accept but correct invalid UTF-8 in Marshal (CL 11211045). flag: add Getter interface (CL 10472043). fmt: indexed access to arguments in Printf etc. (CL 9680043). go/build: support including C++ code with cgo (CL 8248043). diff --git a/src/pkg/encoding/json/decode_test.go b/src/pkg/encoding/json/decode_test.go index 1191d6cee5..dfc688cdc4 100644 --- a/src/pkg/encoding/json/decode_test.go +++ b/src/pkg/encoding/json/decode_test.go @@ -393,15 +393,10 @@ func TestMarshal(t *testing.T) { func TestMarshalBadUTF8(t *testing.T) { s := "hello\xffworld" + const enc = `"hello\ufffdworld"` b, err := Marshal(s) - if err == nil { - t.Fatal("Marshal bad UTF8: no error") - } - if len(b) != 0 { - t.Fatal("Marshal returned data") - } - if _, ok := err.(*InvalidUTF8Error); !ok { - t.Fatalf("Marshal did not return InvalidUTF8Error: %T %v", err, err) + if string(b) != enc || err != nil { + t.Errorf("Marshal(%q) = %#q, %v, want %#q, nil", s, b, err, enc) } } diff --git a/src/pkg/encoding/json/encode.go b/src/pkg/encoding/json/encode.go index 55df9b5768..7cc9398c97 100644 --- a/src/pkg/encoding/json/encode.go +++ b/src/pkg/encoding/json/encode.go @@ -209,8 +209,12 @@ func (e *UnsupportedValueError) Error() string { return "json: unsupported value: " + e.Str } -// An InvalidUTF8Error is returned by Marshal when attempting -// to encode a string value with invalid UTF-8 sequences. +// Before Go 1.2, an InvalidUTF8Error was returned by Marshal when +// attempting to encode a string value with invalid UTF-8 sequences. +// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by +// replacing invalid bytes with the Unicode replacement rune U+FFFD. +// This error is no longer generated but is kept for backwards compatibility +// with programs that might mention it. type InvalidUTF8Error struct { S string // the whole string value that caused the error } @@ -555,7 +559,13 @@ func (e *encodeState) string(s string) (int, error) { } c, size := utf8.DecodeRuneInString(s[i:]) if c == utf8.RuneError && size == 1 { - e.error(&InvalidUTF8Error{s}) + if start < i { + e.WriteString(s[start:i]) + } + e.WriteString(`\ufffd`) + i += size + start = i + continue } // U+2028 is LINE SEPARATOR. // U+2029 is PARAGRAPH SEPARATOR.