From: Axel Wagner Date: Wed, 14 Feb 2024 08:38:46 +0000 (+0100) Subject: encoding/xml: reject XML declaration after start of document X-Git-Tag: go1.23rc1~1152 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8a0fbd75a54c27ff2ae624ac2775bf752cdbceb4;p=gostls13.git encoding/xml: reject XML declaration after start of document The XML specification requires an XML declaration, if present, to only appear at the very beginning of the document, not even preceded by whitespace. The parser currently accepts it at any part of the input. Rejecting whitespace at the beginning of the file might break too many users. This change instead only rejects an XML declaration preceded by a non-whitespace token *and* allows the Encoder to emit whitespace before an XML declaration. This means that a token stream produced by the Decoder can be passed to the Encoder without error, while we still don't emit clearly invalid XML. This might break programs depending on Decoder allowing arbitrary XML before the XML declaration. Fixes #65691. Change-Id: Ib1d4b3116aee63f40fd377f90595780b4befd1ee Reviewed-on: https://go-review.googlesource.com/c/go/+/564035 Auto-Submit: Ian Lance Taylor LUCI-TryBot-Result: Go LUCI Reviewed-by: Than McIntosh Reviewed-by: Ian Lance Taylor --- diff --git a/src/encoding/xml/marshal.go b/src/encoding/xml/marshal.go index 05b5542dfb..c30f0dca15 100644 --- a/src/encoding/xml/marshal.go +++ b/src/encoding/xml/marshal.go @@ -208,7 +208,6 @@ var ( // EncodeToken allows writing a [ProcInst] with Target set to "xml" only as the first token // in the stream. func (enc *Encoder) EncodeToken(t Token) error { - p := &enc.p switch t := t.(type) { case StartElement: @@ -228,11 +227,10 @@ func (enc *Encoder) EncodeToken(t Token) error { p.WriteString("") - return p.cachedWriteError() case ProcInst: // First token to be encoded which is also a ProcInst with target of xml // is the xml declaration. The only ProcInst where target of xml is allowed. - if t.Target == "xml" && p.w.Buffered() != 0 { + if t.Target == "xml" && p.wroteNonWS { return fmt.Errorf("xml: EncodeToken of ProcInst xml target only valid for xml declaration, first token encoded") } if !isNameString(t.Target) { @@ -257,9 +255,30 @@ func (enc *Encoder) EncodeToken(t Token) error { p.WriteString(">") default: return fmt.Errorf("xml: EncodeToken of invalid token type") + } + if err := p.cachedWriteError(); err != nil || enc.p.wroteNonWS { + return err + } + enc.p.wroteNonWS = !isWhitespace(t) + return nil +} +// isWhitespace reports whether t is a CharData token consisting entirely of +// XML whitespace. +func isWhitespace(t Token) bool { + switch t := t.(type) { + case CharData: + for _, b := range t { + switch b { + case ' ', '\r', '\n', '\t': + default: + return false + } + } + return true + default: + return false } - return p.cachedWriteError() } // isValidDirective reports whether dir is a valid directive text, @@ -329,6 +348,7 @@ type printer struct { prefixes []string tags []Name closed bool + wroteNonWS bool err error } diff --git a/src/encoding/xml/marshal_test.go b/src/encoding/xml/marshal_test.go index f6bcc7fd30..90922f549e 100644 --- a/src/encoding/xml/marshal_test.go +++ b/src/encoding/xml/marshal_test.go @@ -2356,6 +2356,9 @@ func TestProcInstEncodeToken(t *testing.T) { var buf bytes.Buffer enc := NewEncoder(&buf) + if err := enc.EncodeToken(CharData(" \n\r\t")); err != nil { + t.Fatal("enc.EncodeToken: expected to be able to encode whitespace as first token") + } if err := enc.EncodeToken(ProcInst{"xml", []byte("Instruction")}); err != nil { t.Fatalf("enc.EncodeToken: expected to be able to encode xml target ProcInst as first token, %s", err) } diff --git a/src/encoding/xml/xml.go b/src/encoding/xml/xml.go index 6b8f2e7978..a1e63ed30d 100644 --- a/src/encoding/xml/xml.go +++ b/src/encoding/xml/xml.go @@ -212,6 +212,7 @@ type Decoder struct { line int linestart int64 offset int64 + readNonWS bool unmarshalDepth int } @@ -559,6 +560,8 @@ func (d *Decoder) rawToken() (Token, error) { return EndElement{d.toClose}, nil } + readNonWS := d.readNonWS + b, ok := d.getc() if !ok { return nil, d.err @@ -571,8 +574,12 @@ func (d *Decoder) rawToken() (Token, error) { if data == nil { return nil, d.err } + if !d.readNonWS && !isWhitespace(CharData(data)) { + d.readNonWS = true + } return CharData(data), nil } + d.readNonWS = true if b, ok = d.mustgetc(); !ok { return nil, d.err @@ -623,6 +630,11 @@ func (d *Decoder) rawToken() (Token, error) { data = data[0 : len(data)-2] // chop ?> if target == "xml" { + if readNonWS { + d.err = errors.New("xml: XML declaration after start of document") + return nil, d.err + } + content := string(data) ver := procInst("version", content) if ver != "" && ver != "1.0" { diff --git a/src/encoding/xml/xml_test.go b/src/encoding/xml/xml_test.go index 4bec4e7f1e..2c985f7c70 100644 --- a/src/encoding/xml/xml_test.go +++ b/src/encoding/xml/xml_test.go @@ -1350,10 +1350,12 @@ func TestParseErrors(t *testing.T) { // Header-related errors. {``, `unsupported version "1.1"; only version 1.0 is supported`}, + {``, `XML declaration after start of document`}, // Cases below are for "no errors". {withDefaultHeader(``), ``}, {withDefaultHeader(``), ``}, + {` `, ``}, } for _, test := range tests {