From 287e45e2418fa2d22e1cea22b6f9a5b0e1659bb5 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 13 Dec 2010 15:51:11 -0500 Subject: [PATCH] json: check for invalid UTF-8 Fixes #1250. R=r CC=golang-dev https://golang.org/cl/3562042 --- src/pkg/json/decode_test.go | 14 +++++++++++ src/pkg/json/encode.go | 49 ++++++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/src/pkg/json/decode_test.go b/src/pkg/json/decode_test.go index b805d3d82f..92c1cafa70 100644 --- a/src/pkg/json/decode_test.go +++ b/src/pkg/json/decode_test.go @@ -102,6 +102,20 @@ func TestMarshal(t *testing.T) { } } +func TestMarshalBadUTF8(t *testing.T) { + s := "hello\xffworld" + b, err := Marshal(s) + if err == nil { + t.Fatal("Marshal bad UTF8: no error") + } + if len(b) != 0 { + t.Fatal("Marshal returned data") + } + if _, ok := err.(*InvalidUTF8Error); !ok { + t.Fatal("Marshal did not return InvalidUTF8Error: %T %v", err, err) + } +} + func TestUnmarshal(t *testing.T) { var scan scanner for i, tt := range unmarshalTests { diff --git a/src/pkg/json/encode.go b/src/pkg/json/encode.go index 8b2f99f8f0..e043a317e3 100644 --- a/src/pkg/json/encode.go +++ b/src/pkg/json/encode.go @@ -13,6 +13,7 @@ import ( "runtime" "sort" "strconv" + "utf8" ) // Marshal returns the JSON encoding of v. @@ -129,6 +130,14 @@ func (e *UnsupportedTypeError) String() string { return "json: unsupported type: " + e.Type.String() } +type InvalidUTF8Error struct { + S string +} + +func (e *InvalidUTF8Error) String() string { + return "json: invalid UTF-8 in string: " + strconv.Quote(e.S) +} + type MarshalerError struct { Type reflect.Type Error os.Error @@ -281,18 +290,36 @@ func (sv stringValues) get(i int) string { return sv[i].(*reflect.StringValue) func (e *encodeState) string(s string) { e.WriteByte('"') - for _, c := range s { - switch { - case c < 0x20: - e.WriteString(`\u00`) - e.WriteByte(hex[c>>4]) - e.WriteByte(hex[c&0xF]) - case c == '\\' || c == '"': - e.WriteByte('\\') - fallthrough - default: - e.WriteRune(c) + start := 0 + for i := 0; i < len(s); { + if b := s[i]; b < utf8.RuneSelf { + if 0x20 <= b && b != '\\' && b != '"' { + i++ + continue + } + if start < i { + e.WriteString(s[start:i]) + } + if b == '\\' || b == '"' { + e.WriteByte('\\') + e.WriteByte(b) + } else { + e.WriteString(`\u00`) + e.WriteByte(hex[b>>4]) + e.WriteByte(hex[b&0xF]) + } + i++ + start = i + continue + } + c, size := utf8.DecodeRuneInString(s[i:]) + if c == utf8.RuneError && size == 1 { + e.error(&InvalidUTF8Error{s}) } + i += size + } + if start < len(s) { + e.WriteString(s[start:]) } e.WriteByte('"') } -- 2.48.1