]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/xml: replace comments inside directives with a space
authorFilippo Valsorda <filippo@golang.org>
Mon, 26 Oct 2020 23:21:30 +0000 (00:21 +0100)
committerFilippo Valsorda <filippo@golang.org>
Mon, 15 Mar 2021 20:04:23 +0000 (20:04 +0000)
A Directive (like <!ENTITY xxx []>) can't have other nodes nested inside
it (in our data structure representation), so there is no way to
preserve comments. The previous behavior was to just elide them, which
however might change the semantic meaning of the surrounding markup.
Instead, replace them with a space which hopefully has the same semantic
effect of the comment.

Directives are not actually a node type in the XML spec, which instead
specifies each of them separately (<!ENTITY, <!DOCTYPE, etc.), each with
its own grammar. The rules for where and when the comments are allowed
are not straightforward, and can't be implemented without implementing
custom logic for each of the directives.

Simply preserving the comments in the body of the directive would be
problematic, as there can be unmatched quotes inside the comment.
Whether those quotes are considered meaningful semantically or not,
other parsers might disagree and interpret the output differently.

This issue was reported by Juho Nurminen of Mattermost as it leads to
round-trip mismatches. See #43168. It's not being fixed in a security
release because round-trip stability is not a currently supported
security property of encoding/xml, and we don't believe these fixes
would be sufficient to reliably guarantee it in the future.

Fixes CVE-2020-29510
Updates #43168

Change-Id: Icd86c75beff3e1e0689543efebdad10ed5178ce3
Reviewed-on: https://go-review.googlesource.com/c/go/+/277893
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Katie Hockman <katie@golang.org>
src/encoding/xml/xml.go
src/encoding/xml/xml_test.go

index c902f1295aafea07166699a14006e89ba54b473d..c14954df155a64a0ff3ef1126f22376538f4f78a 100644 (file)
@@ -768,6 +768,12 @@ func (d *Decoder) rawToken() (Token, error) {
                                        }
                                        b0, b1 = b1, b
                                }
+
+                               // Replace the comment with a space in the returned Directive
+                               // body, so that markup parts that were separated by the comment
+                               // (like a "<" and a "!") don't get joined when re-encoding the
+                               // Directive, taking new semantic meaning.
+                               d.buf.WriteByte(' ')
                        }
                }
                return Directive(d.buf.Bytes()), nil
index 47d0c391670df15b25be19624a17108417320886..19152dbdb689511a4f3ec28eab5ee21172438472 100644 (file)
@@ -802,11 +802,11 @@ var directivesWithCommentsInput = `
 
 var directivesWithCommentsTokens = []Token{
        CharData("\n"),
-       Directive(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
+       Directive(`DOCTYPE [ <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`),
        CharData("\n"),
-       Directive(`DOCTYPE [<!ENTITY go "Golang">]`),
+       Directive(`DOCTYPE [<!ENTITY go "Golang"> ]`),
        CharData("\n"),
-       Directive(`DOCTYPE <!-> <!>    [<!ENTITY go "Golang">]`),
+       Directive(`DOCTYPE <!-> <!>       [<!ENTITY go "Golang"> ]`),
        CharData("\n"),
 }
 
@@ -1051,9 +1051,10 @@ func testRoundTrip(t *testing.T, input string) {
 
 func TestRoundTrip(t *testing.T) {
        tests := map[string]string{
-               "leading colon":  `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
-               "trailing colon": `<foo abc:="x"></foo>`,
-               "double colon":   `<x:y:foo></x:y:foo>`,
+               "leading colon":          `<::Test ::foo="bar"><:::Hello></:::Hello><Hello></Hello></::Test>`,
+               "trailing colon":         `<foo abc:="x"></foo>`,
+               "double colon":           `<x:y:foo></x:y:foo>`,
+               "comments in directives": `<!ENTITY x<!<!-- c1 [ " -->--x --> > <e></e> <!DOCTYPE xxx [ x<!-- c2 " -->--x ]>`,
        }
        for name, input := range tests {
                t.Run(name, func(t *testing.T) { testRoundTrip(t, input) })