]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/xml: improve marshaller sanity checks of directives
authorDidier Spezia <didier.06@gmail.com>
Sat, 27 Jun 2015 13:07:22 +0000 (13:07 +0000)
committerRuss Cox <rsc@golang.org>
Wed, 15 Jul 2015 01:56:14 +0000 (01:56 +0000)
When building a directive, the current sanity check prevents
a '>' to be used, which makes a DOCTYPE directive with an
internal subset be rejected. It is accepted by the parser
though, so what can be parsed cannot be encoded.

Improved the corresponding sanity check to mirror the behavior
of the parser (in the way it handles angle brackets, quotes,
and comments).

Fixes #10158

Change-Id: Ieffea9f870f2694548e12897f8f47babc0ea4414
Reviewed-on: https://go-review.googlesource.com/11630
Reviewed-by: Russ Cox <rsc@golang.org>
src/encoding/xml/marshal.go
src/encoding/xml/marshal_test.go

index 88e7d99cb5c3610b1809b771012afcefbcd019d4..5a49cc35285a75b638db88a95df0b20d4cb479bc 100644 (file)
@@ -173,6 +173,7 @@ func (enc *Encoder) EncodeElement(v interface{}, start StartElement) error {
 }
 
 var (
+       begComment   = []byte("<!--")
        endComment   = []byte("-->")
        endProcInst  = []byte("?>")
        endDirective = []byte(">")
@@ -238,8 +239,8 @@ func (enc *Encoder) EncodeToken(t Token) error {
                }
                p.WriteString("?>")
        case Directive:
-               if bytes.Contains(t, endDirective) {
-                       return fmt.Errorf("xml: EncodeToken of Directive containing > marker")
+               if !isValidDirective(t) {
+                       return fmt.Errorf("xml: EncodeToken of Directive containing wrong < or > markers")
                }
                p.WriteString("<!")
                p.Write(t)
@@ -248,6 +249,46 @@ func (enc *Encoder) EncodeToken(t Token) error {
        return p.cachedWriteError()
 }
 
+// isValidDirective reports whether dir is a valid directive text,
+// meaning angle brackets are matched, ignoring comments and strings.
+func isValidDirective(dir Directive) bool {
+       var (
+               depth     int
+               inquote   uint8
+               incomment bool
+       )
+       for i, c := range dir {
+               switch {
+               case incomment:
+                       if c == '>' {
+                               if n := 1 + i - len(endComment); n >= 0 && bytes.Equal(dir[n:i+1], endComment) {
+                                       incomment = false
+                               }
+                       }
+                       // Just ignore anything in comment
+               case inquote != 0:
+                       if c == inquote {
+                               inquote = 0
+                       }
+                       // Just ignore anything within quotes
+               case c == '\'' || c == '"':
+                       inquote = c
+               case c == '<':
+                       if i+len(begComment) < len(dir) && bytes.Equal(dir[i:i+len(begComment)], begComment) {
+                               incomment = true
+                       } else {
+                               depth++
+                       }
+               case c == '>':
+                       if depth == 0 {
+                               return false
+                       }
+                       depth--
+               }
+       }
+       return depth == 0 && inquote == 0 && !incomment
+}
+
 // Flush flushes any buffered XML to the underlying writer.
 // See the EncodeToken documentation for details about when it is necessary.
 func (enc *Encoder) Flush() error {
index 4c478ddded94efb9975139e5676afa2a9cf60790..78fe841d76abce1edc759bb3a1f7c6a394bb4af2 100644 (file)
@@ -1527,12 +1527,18 @@ var encodeTokenTests = []struct {
                Directive("foo"),
        },
        want: `<!foo>`,
+}, {
+       desc: "more complex directive",
+       toks: []Token{
+               Directive("DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]"),
+       },
+       want: `<!DOCTYPE doc [ <!ELEMENT doc '>'> <!-- com>ment --> ]>`,
 }, {
        desc: "directive instruction with bad name",
        toks: []Token{
                Directive("foo>"),
        },
-       err: "xml: EncodeToken of Directive containing > marker",
+       err: "xml: EncodeToken of Directive containing wrong < or > markers",
 }, {
        desc: "end tag without start tag",
        toks: []Token{
@@ -1868,3 +1874,35 @@ func TestRace9796(t *testing.T) {
        }
        wg.Wait()
 }
+
+func TestIsValidDirective(t *testing.T) {
+       testOK := []string{
+               "<>",
+               "< < > >",
+               "<!DOCTYPE '<' '>' '>' <!--nothing-->>",
+               "<!DOCTYPE doc [ <!ELEMENT doc ANY> <!ELEMENT doc ANY> ]>",
+               "<!DOCTYPE doc [ <!ELEMENT doc \"ANY> '<' <!E\" LEMENT '>' doc ANY> ]>",
+               "<!DOCTYPE doc <!-- just>>>> a < comment --> [ <!ITEM anything> ] >",
+       }
+       testKO := []string{
+               "<",
+               ">",
+               "<!--",
+               "-->",
+               "< > > < < >",
+               "<!dummy <!-- > -->",
+               "<!DOCTYPE doc '>",
+               "<!DOCTYPE doc '>'",
+               "<!DOCTYPE doc <!--comment>",
+       }
+       for _, s := range testOK {
+               if !isValidDirective(Directive(s)) {
+                       t.Errorf("Directive %q is expected to be valid", s)
+               }
+       }
+       for _, s := range testKO {
+               if isValidDirective(Directive(s)) {
+                       t.Errorf("Directive %q is expected to be invalid", s)
+               }
+       }
+}