]> Cypherpunks repositories - gostls13.git/commitdiff
json: check for invalid UTF-8
authorRuss Cox <rsc@golang.org>
Mon, 13 Dec 2010 20:51:11 +0000 (15:51 -0500)
committerRuss Cox <rsc@golang.org>
Mon, 13 Dec 2010 20:51:11 +0000 (15:51 -0500)
Fixes #1250.

R=r
CC=golang-dev
https://golang.org/cl/3562042

src/pkg/json/decode_test.go
src/pkg/json/encode.go

index b805d3d82f7b610c3de990d002bec85c520e77ce..92c1cafa709a120366113f781fdcdc6806275e1b 100644 (file)
@@ -102,6 +102,20 @@ func TestMarshal(t *testing.T) {
        }
 }
 
+func TestMarshalBadUTF8(t *testing.T) {
+       s := "hello\xffworld"
+       b, err := Marshal(s)
+       if err == nil {
+               t.Fatal("Marshal bad UTF8: no error")
+       }
+       if len(b) != 0 {
+               t.Fatal("Marshal returned data")
+       }
+       if _, ok := err.(*InvalidUTF8Error); !ok {
+               t.Fatal("Marshal did not return InvalidUTF8Error: %T %v", err, err)
+       }
+}
+
 func TestUnmarshal(t *testing.T) {
        var scan scanner
        for i, tt := range unmarshalTests {
index 8b2f99f8f018b854b40d08f0d980eb6a5bc11138..e043a317e34e29c0a9e26844ae7e7dd5c36a83e6 100644 (file)
@@ -13,6 +13,7 @@ import (
        "runtime"
        "sort"
        "strconv"
+       "utf8"
 )
 
 // Marshal returns the JSON encoding of v.
@@ -129,6 +130,14 @@ func (e *UnsupportedTypeError) String() string {
        return "json: unsupported type: " + e.Type.String()
 }
 
+type InvalidUTF8Error struct {
+       S string
+}
+
+func (e *InvalidUTF8Error) String() string {
+       return "json: invalid UTF-8 in string: " + strconv.Quote(e.S)
+}
+
 type MarshalerError struct {
        Type  reflect.Type
        Error os.Error
@@ -281,18 +290,36 @@ func (sv stringValues) get(i int) string   { return sv[i].(*reflect.StringValue)
 
 func (e *encodeState) string(s string) {
        e.WriteByte('"')
-       for _, c := range s {
-               switch {
-               case c < 0x20:
-                       e.WriteString(`\u00`)
-                       e.WriteByte(hex[c>>4])
-                       e.WriteByte(hex[c&0xF])
-               case c == '\\' || c == '"':
-                       e.WriteByte('\\')
-                       fallthrough
-               default:
-                       e.WriteRune(c)
+       start := 0
+       for i := 0; i < len(s); {
+               if b := s[i]; b < utf8.RuneSelf {
+                       if 0x20 <= b && b != '\\' && b != '"' {
+                               i++
+                               continue
+                       }
+                       if start < i {
+                               e.WriteString(s[start:i])
+                       }
+                       if b == '\\' || b == '"' {
+                               e.WriteByte('\\')
+                               e.WriteByte(b)
+                       } else {
+                               e.WriteString(`\u00`)
+                               e.WriteByte(hex[b>>4])
+                               e.WriteByte(hex[b&0xF])
+                       }
+                       i++
+                       start = i
+                       continue
+               }
+               c, size := utf8.DecodeRuneInString(s[i:])
+               if c == utf8.RuneError && size == 1 {
+                       e.error(&InvalidUTF8Error{s})
                }
+               i += size
+       }
+       if start < len(s) {
+               e.WriteString(s[start:])
        }
        e.WriteByte('"')
 }