]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/xml: fix decoding of unknown entities in non-strict mode
authorGustavo Niemeyer <gustavo@niemeyer.net>
Thu, 17 May 2012 03:04:00 +0000 (00:04 -0300)
committerGustavo Niemeyer <gustavo@niemeyer.net>
Thu, 17 May 2012 03:04:00 +0000 (00:04 -0300)
Fixes #3447.

R=rsc, gustavo
CC=golang-dev
https://golang.org/cl/6039045

src/pkg/encoding/xml/xml.go
src/pkg/encoding/xml/xml_test.go

index 5066f5c01067f9bc69d324bc997644b5fd893e64..623f41780116472085350ad200c5ed47c2fe26cc 100644 (file)
@@ -850,6 +850,8 @@ Input:
                        // Parsers are required to recognize lt, gt, amp, apos, and quot
                        // even if they have not been declared.  That's all we allow.
                        var i int
+                       var semicolon bool
+                       var valid bool
                        for i = 0; i < len(d.tmp); i++ {
                                var ok bool
                                d.tmp[i], ok = d.getc()
@@ -861,6 +863,8 @@ Input:
                                }
                                c := d.tmp[i]
                                if c == ';' {
+                                       semicolon = true
+                                       valid = i > 0
                                        break
                                }
                                if 'a' <= c && c <= 'z' ||
@@ -873,14 +877,25 @@ Input:
                                break
                        }
                        s := string(d.tmp[0:i])
-                       if i >= len(d.tmp) {
+                       if !valid {
                                if !d.Strict {
                                        b0, b1 = 0, 0
                                        d.buf.WriteByte('&')
                                        d.buf.Write(d.tmp[0:i])
+                                       if semicolon {
+                                               d.buf.WriteByte(';')
+                                       }
                                        continue Input
                                }
-                               d.err = d.syntaxError("character entity expression &" + s + "... too long")
+                               semi := ";"
+                               if !semicolon {
+                                       semi = " (no semicolon)"
+                               }
+                               if i < len(d.tmp) {
+                                       d.err = d.syntaxError("invalid character entity &" + s + semi)
+                               } else {
+                                       d.err = d.syntaxError("invalid character entity &" + s + "... too long")
+                               }
                                return nil
                        }
                        var haveText bool
@@ -910,6 +925,7 @@ Input:
                                        b0, b1 = 0, 0
                                        d.buf.WriteByte('&')
                                        d.buf.Write(d.tmp[0:i])
+                                       d.buf.WriteByte(';')
                                        continue Input
                                }
                                d.err = d.syntaxError("invalid character entity &" + s + ";")
index 1d0696ce087b3446a7df588103f284f6eb1ed8b5..d556789fddeddf25adc65461ecf7cd1879d89ba9 100644 (file)
@@ -5,6 +5,7 @@
 package xml
 
 import (
+       "fmt"
        "io"
        "reflect"
        "strings"
@@ -158,6 +159,39 @@ func TestRawToken(t *testing.T) {
        testRawToken(t, d, rawTokens)
 }
 
+const nonStrictInput = `
+<tag>non&entity</tag>
+<tag>&unknown;entity</tag>
+<tag>&#123</tag>
+<tag>&#zzz;</tag>
+`
+
+var nonStrictTokens = []Token{
+       CharData("\n"),
+       StartElement{Name{"", "tag"}, []Attr{}},
+       CharData("non&entity"),
+       EndElement{Name{"", "tag"}},
+       CharData("\n"),
+       StartElement{Name{"", "tag"}, []Attr{}},
+       CharData("&unknown;entity"),
+       EndElement{Name{"", "tag"}},
+       CharData("\n"),
+       StartElement{Name{"", "tag"}, []Attr{}},
+       CharData("&#123"),
+       EndElement{Name{"", "tag"}},
+       CharData("\n"),
+       StartElement{Name{"", "tag"}, []Attr{}},
+       CharData("&#zzz;"),
+       EndElement{Name{"", "tag"}},
+       CharData("\n"),
+}
+
+func TestNonStrictRawToken(t *testing.T) {
+       d := NewDecoder(strings.NewReader(nonStrictInput))
+       d.Strict = false
+       testRawToken(t, d, nonStrictTokens)
+}
+
 type downCaser struct {
        t *testing.T
        r io.ByteReader
@@ -219,7 +253,18 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) {
                        t.Fatalf("token %d: unexpected error: %s", i, err)
                }
                if !reflect.DeepEqual(have, want) {
-                       t.Errorf("token %d = %#v want %#v", i, have, want)
+                       var shave, swant string
+                       if _, ok := have.(CharData); ok {
+                               shave = fmt.Sprintf("CharData(%q)", have)
+                       } else {
+                               shave = fmt.Sprintf("%#v", have)
+                       }
+                       if _, ok := want.(CharData); ok {
+                               swant = fmt.Sprintf("CharData(%q)", want)
+                       } else {
+                               swant = fmt.Sprintf("%#v", want)
+                       }
+                       t.Errorf("token %d = %s, want %s", i, shave, swant)
                }
        }
 }
@@ -531,8 +576,8 @@ var characterTests = []struct {
        {"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
        {"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
        {"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
-       {"<doc>&\x01;</doc>", "invalid character entity &;"},
-       {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
+       {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
+       {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity & (no semicolon)"},
 }
 
 func TestDisallowedCharacters(t *testing.T) {