encoding/json: remove legacy option to EscapeInvalidUTF8

author Joe Tsai <joetsai@digital-static.net>

Thu, 10 Jul 2025 00:08:49 +0000 (17:08 -0700)

committer Gopher Robot <gobot@golang.org>

Fri, 11 Jul 2025 21:27:20 +0000 (14:27 -0700)
author Joe Tsai <joetsai@digital-static.net>
Thu, 10 Jul 2025 00:08:49 +0000 (17:08 -0700)
committer Gopher Robot <gobot@golang.org>
Fri, 11 Jul 2025 21:27:20 +0000 (14:27 -0700)
diff --git a/src/encoding/json/internal/jsonflags/flags.go b/src/encoding/json/internal/jsonflags/flags.go

index 4496359c895e17dc25c1648503cc8b4dd187aba8..1e8c2842d479cc260abed1332b62a2ea72be0121 100644 (file)
--- a/src/encoding/json/internal/jsonflags/flags.go
+++ b/src/encoding/json/internal/jsonflags/flags.go
@@ -52,7 +52,6 @@ const (
                 AllowInvalidUTF8 |
                 EscapeForHTML |
                 EscapeForJS |
-               EscapeInvalidUTF8 |
                 PreserveRawStrings |
                 Deterministic |
                 FormatNilMapAsNull |
@@ -77,7 +76,7 @@ const (
         WhitespaceFlags = AnyWhitespace | Indent | IndentPrefix
  
         // AnyEscape is the set of flags related to escaping in a JSON string.
-       AnyEscape = EscapeForHTML | EscapeForJS | EscapeInvalidUTF8
+       AnyEscape = EscapeForHTML | EscapeForJS
  
         // CanonicalizeNumbers is the set of flags related to raw number canonicalization.
         CanonicalizeNumbers = CanonicalizeRawInts | CanonicalizeRawFloats
@@ -97,7 +96,6 @@ const (
         ReorderRawObjects     // encode only
         EscapeForHTML         // encode only
         EscapeForJS           // encode only
-       EscapeInvalidUTF8     // encode only; only exposed in v1
         Multiline             // encode only
         SpaceAfterColon       // encode only
         SpaceAfterComma       // encode only
diff --git a/src/encoding/json/internal/jsonwire/encode.go b/src/encoding/json/internal/jsonwire/encode.go

index 3901ff8bed6417c405e3520d3aba2f17a3129df2..8f9b8ab09e64c4bc84ed17535781cafa8bcc49ea 100644 (file)
--- a/src/encoding/json/internal/jsonwire/encode.go
+++ b/src/encoding/json/internal/jsonwire/encode.go
@@ -92,11 +92,7 @@ func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflag
                         case isInvalidUTF8(r, rn):
                                 hasInvalidUTF8 = true
                                 dst = append(dst, src[i:n-rn]...)
-                               if flags.Get(jsonflags.EscapeInvalidUTF8) {
-                                       dst = append(dst, `\ufffd`...)
-                               } else {
-                                       dst = append(dst, "\ufffd"...)
-                               }
+                               dst = append(dst, "\ufffd"...)
                                 i = n
                         case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
                                 dst = append(dst, src[i:n-rn]...)
diff --git a/src/encoding/json/v2_decode_test.go b/src/encoding/json/v2_decode_test.go

index cfcefbfcdc32aaff4dd806d5ce4d51362dec02c6..1e4914efc4aeab7ef2243d87a1501429718ec287 100644 (file)
--- a/src/encoding/json/v2_decode_test.go
+++ b/src/encoding/json/v2_decode_test.go
@@ -1249,12 +1249,12 @@ func TestMarshalInvalidUTF8(t *testing.T) {
                 in   string
                 want string
         }{
-               {Name(""), "hello\xffworld", `"hello\ufffdworld"`},
+               {Name(""), "hello\xffworld", "\"hello\ufffdworld\""},
                 {Name(""), "", `""`},
-               {Name(""), "\xff", `"\ufffd"`},
-               {Name(""), "\xff\xff", `"\ufffd\ufffd"`},
-               {Name(""), "a\xffb", `"a\ufffdb"`},
-               {Name(""), "\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", `"日本\ufffd\ufffd\ufffd"`},
+               {Name(""), "\xff", "\"\ufffd\""},
+               {Name(""), "\xff\xff", "\"\ufffd\ufffd\""},
+               {Name(""), "a\xffb", "\"a\ufffdb\""},
+               {Name(""), "\xe6\x97\xa5\xe6\x9c\xac\xff\xaa\x9e", "\"日本\ufffd\ufffd\ufffd\""},
         }
         for _, tt := range tests {
                 t.Run(tt.Name, func(t *testing.T) {
diff --git a/src/encoding/json/v2_diff_test.go b/src/encoding/json/v2_diff_test.go

index 7a561732f4a187b4564a415a5a149e5a46f79e60..9d0798ed1da46255d4538410608e678fd8aa57c6 100644 (file)
--- a/src/encoding/json/v2_diff_test.go
+++ b/src/encoding/json/v2_diff_test.go
@@ -786,8 +786,8 @@ func TestInvalidUTF8(t *testing.T) {
                         switch {
                         case json.Version == "v1" && err != nil:
                                 t.Fatalf("json.Marshal error: %v", err)
-                       case json.Version == "v1" && string(got) != `"\ufffd"`:
-                               t.Fatalf(`json.Marshal = %s, want "\ufffd"`, got)
+                       case json.Version == "v1" && string(got) != "\"\ufffd\"":
+                               t.Fatalf(`json.Marshal = %s, want %q`, got, "\ufffd")
                         case json.Version == "v2" && err == nil:
                                 t.Fatal("json.Marshal error is nil, want non-nil")
                         }
diff --git a/src/encoding/json/v2_options.go b/src/encoding/json/v2_options.go

index 4006d764ccfba0d81a91e2a980fc85908a915a8a..66bd01eb3c262ce8310d44a745c38159e9aecfdb 100644 (file)
--- a/src/encoding/json/v2_options.go
+++ b/src/encoding/json/v2_options.go
@@ -204,7 +204,6 @@ type Options = jsonopts.Options
  // It is equivalent to the following boolean options being set to true:
  //
  //   - [CallMethodsWithLegacySemantics]
-//   - [EscapeInvalidUTF8]
  //   - [FormatBytesWithLegacySemantics]
  //   - [FormatTimeWithLegacySemantics]
  //   - [MatchCaseSensitiveDelimiter]
@@ -279,23 +278,6 @@ func CallMethodsWithLegacySemantics(v bool) Options {
         }
  }
  
-// EscapeInvalidUTF8 specifies that when encoding a [jsontext.String]
-// with bytes of invalid UTF-8, such bytes are escaped as
-// a hexadecimal Unicode codepoint (i.e., \ufffd).
-// In contrast, the v2 default is to use the minimal representation,
-// which is to encode invalid UTF-8 as the Unicode replacement rune itself
-// (without any form of escaping).
-//
-// This only affects encoding and is ignored when decoding.
-// The v1 default is true.
-func EscapeInvalidUTF8(v bool) Options {
-       if v {
-               return jsonflags.EscapeInvalidUTF8 | 1
-       } else {
-               return jsonflags.EscapeInvalidUTF8 | 0
-       }
-}
-
  // FormatBytesWithLegacySemantics specifies that handling of
  // []~byte and [N]~byte types follow legacy semantics:
  //
author	Joe Tsai <joetsai@digital-static.net>
	Thu, 10 Jul 2025 00:08:49 +0000 (17:08 -0700)
committer	Gopher Robot <gobot@golang.org>
	Fri, 11 Jul 2025 21:27:20 +0000 (14:27 -0700)
src/encoding/json/internal/jsonflags/flags.go		patch \| blob \| history
src/encoding/json/internal/jsonwire/encode.go		patch \| blob \| history
src/encoding/json/v2_decode_test.go		patch \| blob \| history
src/encoding/json/v2_diff_test.go		patch \| blob \| history
src/encoding/json/v2_options.go		patch \| blob \| history