From a0a99cb22b2045b15509d1002a655db407a44a50 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Wed, 9 Jul 2025 16:55:14 -0700 Subject: [PATCH] encoding/json/v2: report wrapped io.ErrUnexpectedEOF In the event that the input is just JSON whitespace, the underlying jsontext.Decoder treats this as an empty stream and reports io.EOF. The logic in unmarshalFull simply casted io.EOF as io.ErrUnexpectedEOF, which is inconsistent with how all other io.ErrUnexpectedEOF are reported, which are wrapped within a jsontext.SyntacticError. Do the same thing for consistency. We add a v1 test (without goexperiment.jsonv2) to verify that the behavior is identical to how v1 has always behaved. We add a v1in2 test (with goexperiment.jsonv2) to verify that the v1in2 behavior correctly replicates historical v1 behavior. We also fix a faulty check in v1 Decoder.Decode, where it tried to detect errUnexpectedEnd and return an unwrapped io.ErrUnexpectedEOF error. This is the exact semantic that v1 has always done in streaming Decoder.Decode (but not non-streaming Unmarshal). There is a prior bug reported in #25956 about this inconsistency, but we aim to preserve historical v1 behavior to reduce the probability of churn when v1 is re-implemented in terms of v2. Fixes #74548 Change-Id: Ibca52c3699ff3c09141e081c85f853781a86ec8e Reviewed-on: https://go-review.googlesource.com/c/go/+/687115 Auto-Submit: Joseph Tsai Reviewed-by: Carlos Amedee LUCI-TryBot-Result: Go LUCI Reviewed-by: Damien Neil --- src/encoding/json/decode_test.go | 15 +++++++++++++-- src/encoding/json/v2/arshal.go | 3 ++- src/encoding/json/v2/arshal_test.go | 8 +++++++- src/encoding/json/v2_decode_test.go | 15 +++++++++++++-- src/encoding/json/v2_scanner.go | 4 ++++ src/encoding/json/v2_stream.go | 2 +- 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go index 473fd02833..0df31c82c8 100644 --- a/src/encoding/json/decode_test.go +++ b/src/encoding/json/decode_test.go @@ -12,6 +12,7 @@ import ( "errors" "fmt" "image" + "io" "maps" "math" "math/big" @@ -469,11 +470,13 @@ var unmarshalTests = []struct { {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, // syntax errors + {CaseName: Name(""), in: ``, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 0}}, + {CaseName: Name(""), in: " \n\r\t", ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 4}}, + {CaseName: Name(""), in: `[2, 3`, ptr: new(any), err: &SyntaxError{"unexpected end of JSON input", 5}}, {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, - {CaseName: Name(""), in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: 5}}, - {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: 9}}, + {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{"invalid character '}' in numeric literal", 9}}, // raw value errors {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, @@ -1377,6 +1380,14 @@ func TestUnmarshal(t *testing.T) { if tt.disallowUnknownFields { dec.DisallowUnknownFields() } + if tt.err != nil && strings.Contains(tt.err.Error(), "unexpected end of JSON input") { + // In streaming mode, we expect EOF or ErrUnexpectedEOF instead. + if strings.TrimSpace(tt.in) == "" { + tt.err = io.EOF + } else { + tt.err = io.ErrUnexpectedEOF + } + } if err := dec.Decode(v.Interface()); !equalError(err, tt.err) { t.Fatalf("%s: Decode error:\n\tgot: %v\n\twant: %v\n\n\tgot: %#v\n\twant: %#v", tt.Where, err, tt.err, err, tt.err) } else if err != nil && tt.out == nil { diff --git a/src/encoding/json/v2/arshal.go b/src/encoding/json/v2/arshal.go index 10b16efe4a..5cd2106be9 100644 --- a/src/encoding/json/v2/arshal.go +++ b/src/encoding/json/v2/arshal.go @@ -438,7 +438,8 @@ func unmarshalFull(in *jsontext.Decoder, out any, uo *jsonopts.Struct) error { case nil: return export.Decoder(in).CheckEOF() case io.EOF: - return io.ErrUnexpectedEOF + offset := in.InputOffset() + int64(len(in.UnreadBuffer())) + return &jsontext.SyntacticError{ByteOffset: offset, Err: io.ErrUnexpectedEOF} default: return err } diff --git a/src/encoding/json/v2/arshal_test.go b/src/encoding/json/v2/arshal_test.go index 8494deed03..879a2f3e0d 100644 --- a/src/encoding/json/v2/arshal_test.go +++ b/src/encoding/json/v2/arshal_test.go @@ -7138,7 +7138,13 @@ func TestUnmarshal(t *testing.T) { inBuf: ``, inVal: addr(structAll{}), want: addr(structAll{}), - wantErr: io.ErrUnexpectedEOF, + wantErr: &jsontext.SyntacticError{Err: io.ErrUnexpectedEOF}, + }, { + name: jsontest.Name("Structs/Invalid/ErrUnexpectedEOF"), + inBuf: " \n\r\t", + inVal: addr(structAll{}), + want: addr(structAll{}), + wantErr: &jsontext.SyntacticError{Err: io.ErrUnexpectedEOF, ByteOffset: len64(" \n\r\t")}, }, { name: jsontest.Name("Structs/Invalid/NestedErrUnexpectedEOF"), inBuf: `{"Pointer":`, diff --git a/src/encoding/json/v2_decode_test.go b/src/encoding/json/v2_decode_test.go index 3ab20e2b5d..cfcefbfcdc 100644 --- a/src/encoding/json/v2_decode_test.go +++ b/src/encoding/json/v2_decode_test.go @@ -12,6 +12,7 @@ import ( "errors" "fmt" "image" + "io" "maps" "math" "math/big" @@ -473,11 +474,13 @@ var unmarshalTests = []struct { {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, // syntax errors + {CaseName: Name(""), in: ``, ptr: new(any), err: &SyntaxError{errUnexpectedEnd.Error(), 0}}, + {CaseName: Name(""), in: " \n\r\t", ptr: new(any), err: &SyntaxError{errUnexpectedEnd.Error(), len64(" \n\r\t")}}, + {CaseName: Name(""), in: `[2, 3`, ptr: new(any), err: &SyntaxError{errUnexpectedEnd.Error(), len64(`[2, 3`)}}, {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", len64(`{"X": "foo", "Y"`)}}, {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", len64(`[1, 2, 3`)}}, {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", len64(`{"X":12`)}, useNumber: true}, - {CaseName: Name(""), in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: len64(`[2, 3`)}}, - {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: len64(`{"F3": -`)}}, + {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{"invalid character '}' in numeric literal", len64(`{"F3": -`)}}, // raw value errors {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", len64(``)}}, @@ -1382,6 +1385,14 @@ func TestUnmarshal(t *testing.T) { if tt.disallowUnknownFields { dec.DisallowUnknownFields() } + if tt.err != nil && strings.Contains(tt.err.Error(), errUnexpectedEnd.Error()) { + // In streaming mode, we expect EOF or ErrUnexpectedEOF instead. + if strings.TrimSpace(tt.in) == "" { + tt.err = io.EOF + } else { + tt.err = io.ErrUnexpectedEOF + } + } if err := dec.Decode(v.Interface()); !equalError(err, tt.err) { t.Fatalf("%s: Decode error:\n\tgot: %v\n\twant: %v\n\n\tgot: %#v\n\twant: %#v", tt.Where, err, tt.err, err, tt.err) } else if err != nil && tt.out == nil { diff --git a/src/encoding/json/v2_scanner.go b/src/encoding/json/v2_scanner.go index 475bf58b20..aef045f466 100644 --- a/src/encoding/json/v2_scanner.go +++ b/src/encoding/json/v2_scanner.go @@ -30,6 +30,10 @@ func checkValid(data []byte) error { xd := export.Decoder(d) xd.Struct.Flags.Set(jsonflags.AllowDuplicateNames | jsonflags.AllowInvalidUTF8 | 1) if _, err := d.ReadValue(); err != nil { + if err == io.EOF { + offset := d.InputOffset() + int64(len(d.UnreadBuffer())) + err = &jsontext.SyntacticError{ByteOffset: offset, Err: io.ErrUnexpectedEOF} + } return transformSyntacticError(err) } if err := xd.CheckEOF(); err != nil { diff --git a/src/encoding/json/v2_stream.go b/src/encoding/json/v2_stream.go index d58bafbfd0..ccbef6077b 100644 --- a/src/encoding/json/v2_stream.go +++ b/src/encoding/json/v2_stream.go @@ -68,7 +68,7 @@ func (dec *Decoder) Decode(v any) error { b, err := dec.dec.ReadValue() if err != nil { dec.err = transformSyntacticError(err) - if dec.err == errUnexpectedEnd { + if dec.err.Error() == errUnexpectedEnd.Error() { // NOTE: Decode has always been inconsistent with Unmarshal // with regard to the exact error value for truncated input. dec.err = io.ErrUnexpectedEOF -- 2.50.0