[release-branch.go1.14] encoding/json: revert "avoid work when unquoting strings...

author Daniel Martí <mvdan@mvdan.cc>

Sun, 14 Jun 2020 21:09:18 +0000 (22:09 +0100)

committer Dmitri Shuralyov <dmitshur@golang.org>

Thu, 9 Jul 2020 17:46:51 +0000 (17:46 +0000)
author Daniel Martí <mvdan@mvdan.cc>
Sun, 14 Jun 2020 21:09:18 +0000 (22:09 +0100)
committer Dmitri Shuralyov <dmitshur@golang.org>
Thu, 9 Jul 2020 17:46:51 +0000 (17:46 +0000)
diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go

index b60e2bb0b2c4dbee63f3a4d7e59c7cb03550abe3..86d8a69db7e6d76aa40ca9d4b307ea2f43e909b7 100644 (file)
--- a/src/encoding/json/decode.go
+++ b/src/encoding/json/decode.go
@@ -213,9 +213,6 @@ type decodeState struct {
         savedError            error
         useNumber             bool
         disallowUnknownFields bool
-       // safeUnquote is the number of current string literal bytes that don't
-       // need to be unquoted. When negative, no bytes need unquoting.
-       safeUnquote int
  }
  
  // readIndex returns the position of the last byte read.
@@ -317,27 +314,13 @@ func (d *decodeState) rescanLiteral() {
  Switch:
         switch data[i-1] {
         case '"': // string
-               // safeUnquote is initialized at -1, which means that all bytes
-               // checked so far can be unquoted at a later time with no work
-               // at all. When reaching the closing '"', if safeUnquote is
-               // still -1, all bytes can be unquoted with no work. Otherwise,
-               // only those bytes up until the first '\\' or non-ascii rune
-               // can be safely unquoted.
-               safeUnquote := -1
                 for ; i < len(data); i++ {
-                       if c := data[i]; c == '\\' {
-                               if safeUnquote < 0 { // first unsafe byte
-                                       safeUnquote = int(i - d.off)
-                               }
+                       switch data[i] {
+                       case '\\':
                                 i++ // escaped char
-                       } else if c == '"' {
-                               d.safeUnquote = safeUnquote
+                       case '"':
                                 i++ // tokenize the closing quote too
                                 break Switch
-                       } else if c >= utf8.RuneSelf {
-                               if safeUnquote < 0 { // first unsafe byte
-                                       safeUnquote = int(i - d.off)
-                               }
                         }
                 }
         case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-': // number
@@ -691,7 +674,7 @@ func (d *decodeState) object(v reflect.Value) error {
                 start := d.readIndex()
                 d.rescanLiteral()
                 item := d.data[start:d.readIndex()]
-               key, ok := d.unquoteBytes(item)
+               key, ok := unquoteBytes(item)
                 if !ok {
                         panic(phasePanicMsg)
                 }
@@ -892,7 +875,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
                         d.saveError(&UnmarshalTypeError{Value: val, Type: v.Type(), Offset: int64(d.readIndex())})
                         return nil
                 }
-               s, ok := d.unquoteBytes(item)
+               s, ok := unquoteBytes(item)
                 if !ok {
                         if fromQuoted {
                                 return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -943,7 +926,7 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
                 }
  
         case '"': // string
-               s, ok := d.unquoteBytes(item)
+               s, ok := unquoteBytes(item)
                 if !ok {
                         if fromQuoted {
                                 return fmt.Errorf("json: invalid use of ,string struct tag, trying to unmarshal %q into %v", item, v.Type())
@@ -1103,7 +1086,7 @@ func (d *decodeState) objectInterface() map[string]interface{} {
                 start := d.readIndex()
                 d.rescanLiteral()
                 item := d.data[start:d.readIndex()]
-               key, ok := d.unquote(item)
+               key, ok := unquote(item)
                 if !ok {
                         panic(phasePanicMsg)
                 }
@@ -1152,7 +1135,7 @@ func (d *decodeState) literalInterface() interface{} {
                 return c == 't'
  
         case '"': // string
-               s, ok := d.unquote(item)
+               s, ok := unquote(item)
                 if !ok {
                         panic(phasePanicMsg)
                 }
@@ -1195,33 +1178,40 @@ func getu4(s []byte) rune {
  
  // unquote converts a quoted JSON string literal s into an actual string t.
  // The rules are different than for Go, so cannot use strconv.Unquote.
-// The first byte in s must be '"'.
-func (d *decodeState) unquote(s []byte) (t string, ok bool) {
-       s, ok = d.unquoteBytes(s)
+func unquote(s []byte) (t string, ok bool) {
+       s, ok = unquoteBytes(s)
         t = string(s)
         return
  }
  
-func (d *decodeState) unquoteBytes(s []byte) (t []byte, ok bool) {
-       // We already know that s[0] == '"'. However, we don't know that the
-       // closing quote exists in all cases, such as when the string is nested
-       // via the ",string" option.
-       if len(s) < 2 || s[len(s)-1] != '"' {
+func unquoteBytes(s []byte) (t []byte, ok bool) {
+       if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
                 return
         }
         s = s[1 : len(s)-1]
  
-       // If there are no unusual characters, no unquoting is needed, so return
-       // a slice of the original bytes.
-       r := d.safeUnquote
-       if r == -1 {
+       // Check for unusual characters. If there are none,
+       // then no unquoting is needed, so return a slice of the
+       // original bytes.
+       r := 0
+       for r < len(s) {
+               c := s[r]
+               if c == '\\' || c == '"' || c < ' ' {
+                       break
+               }
+               if c < utf8.RuneSelf {
+                       r++
+                       continue
+               }
+               rr, size := utf8.DecodeRune(s[r:])
+               if rr == utf8.RuneError && size == 1 {
+                       break
+               }
+               r += size
+       }
+       if r == len(s) {
                 return s, true
         }
-       // Only perform up to one safe unquote for each re-scanned string
-       // literal. In some edge cases, the decoder unquotes a literal a second
-       // time, even after another literal has been re-scanned. Thus, only the
-       // first unquote can safely use safeUnquote.
-       d.safeUnquote = 0
  
         b := make([]byte, len(s)+2*utf8.UTFMax)
         w := copy(b, s[0:r])
diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go

index a49181e9823d378e1eac372281722981678e3ad7..689cc34c24361ca97b9ca6f4325518040125bcf5 100644 (file)
--- a/src/encoding/json/decode_test.go
+++ b/src/encoding/json/decode_test.go
@@ -2459,4 +2459,20 @@ func TestUnmarshalRescanLiteralMangledUnquote(t *testing.T) {
         if t1 != t2 {
                 t.Errorf("Marshal and Unmarshal roundtrip mismatch: want %q got %q", t1, t2)
         }
+
+       // See golang.org/issues/39555.
+       input := map[textUnmarshalerString]string{"FOO": "", `"`: ""}
+
+       encoded, err := Marshal(input)
+       if err != nil {
+               t.Fatalf("Marshal unexpected error: %v", err)
+       }
+       var got map[textUnmarshalerString]string
+       if err := Unmarshal(encoded, &got); err != nil {
+               t.Fatalf("Unmarshal unexpected error: %v", err)
+       }
+       want := map[textUnmarshalerString]string{"foo": "", `"`: ""}
+       if !reflect.DeepEqual(want, got) {
+               t.Fatalf("Unexpected roundtrip result:\nwant: %q\ngot:  %q", want, got)
+       }
  }
author	Daniel Martí <mvdan@mvdan.cc>
	Sun, 14 Jun 2020 21:09:18 +0000 (22:09 +0100)
committer	Dmitri Shuralyov <dmitshur@golang.org>
	Thu, 9 Jul 2020 17:46:51 +0000 (17:46 +0000)
src/encoding/json/decode.go		patch \| blob \| history
src/encoding/json/decode_test.go		patch \| blob \| history