From: Gustavo Niemeyer Date: Thu, 17 May 2012 03:04:00 +0000 (-0300) Subject: encoding/xml: fix decoding of unknown entities in non-strict mode X-Git-Tag: go1.1rc2~3189 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=fdc45367f97b9cea81b3f8c045b426d6e4a11766;p=gostls13.git encoding/xml: fix decoding of unknown entities in non-strict mode Fixes #3447. R=rsc, gustavo CC=golang-dev https://golang.org/cl/6039045 --- diff --git a/src/pkg/encoding/xml/xml.go b/src/pkg/encoding/xml/xml.go index 5066f5c010..623f417801 100644 --- a/src/pkg/encoding/xml/xml.go +++ b/src/pkg/encoding/xml/xml.go @@ -850,6 +850,8 @@ Input: // Parsers are required to recognize lt, gt, amp, apos, and quot // even if they have not been declared. That's all we allow. var i int + var semicolon bool + var valid bool for i = 0; i < len(d.tmp); i++ { var ok bool d.tmp[i], ok = d.getc() @@ -861,6 +863,8 @@ Input: } c := d.tmp[i] if c == ';' { + semicolon = true + valid = i > 0 break } if 'a' <= c && c <= 'z' || @@ -873,14 +877,25 @@ Input: break } s := string(d.tmp[0:i]) - if i >= len(d.tmp) { + if !valid { if !d.Strict { b0, b1 = 0, 0 d.buf.WriteByte('&') d.buf.Write(d.tmp[0:i]) + if semicolon { + d.buf.WriteByte(';') + } continue Input } - d.err = d.syntaxError("character entity expression &" + s + "... too long") + semi := ";" + if !semicolon { + semi = " (no semicolon)" + } + if i < len(d.tmp) { + d.err = d.syntaxError("invalid character entity &" + s + semi) + } else { + d.err = d.syntaxError("invalid character entity &" + s + "... too long") + } return nil } var haveText bool @@ -910,6 +925,7 @@ Input: b0, b1 = 0, 0 d.buf.WriteByte('&') d.buf.Write(d.tmp[0:i]) + d.buf.WriteByte(';') continue Input } d.err = d.syntaxError("invalid character entity &" + s + ";") diff --git a/src/pkg/encoding/xml/xml_test.go b/src/pkg/encoding/xml/xml_test.go index 1d0696ce08..d556789fdd 100644 --- a/src/pkg/encoding/xml/xml_test.go +++ b/src/pkg/encoding/xml/xml_test.go @@ -5,6 +5,7 @@ package xml import ( + "fmt" "io" "reflect" "strings" @@ -158,6 +159,39 @@ func TestRawToken(t *testing.T) { testRawToken(t, d, rawTokens) } +const nonStrictInput = ` +non&entity +&unknown;entity +{ +&#zzz; +` + +var nonStrictTokens = []Token{ + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("non&entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&unknown;entity"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("{"), + EndElement{Name{"", "tag"}}, + CharData("\n"), + StartElement{Name{"", "tag"}, []Attr{}}, + CharData("&#zzz;"), + EndElement{Name{"", "tag"}}, + CharData("\n"), +} + +func TestNonStrictRawToken(t *testing.T) { + d := NewDecoder(strings.NewReader(nonStrictInput)) + d.Strict = false + testRawToken(t, d, nonStrictTokens) +} + type downCaser struct { t *testing.T r io.ByteReader @@ -219,7 +253,18 @@ func testRawToken(t *testing.T, d *Decoder, rawTokens []Token) { t.Fatalf("token %d: unexpected error: %s", i, err) } if !reflect.DeepEqual(have, want) { - t.Errorf("token %d = %#v want %#v", i, have, want) + var shave, swant string + if _, ok := have.(CharData); ok { + shave = fmt.Sprintf("CharData(%q)", have) + } else { + shave = fmt.Sprintf("%#v", have) + } + if _, ok := want.(CharData); ok { + swant = fmt.Sprintf("CharData(%q)", want) + } else { + swant = fmt.Sprintf("%#v", want) + } + t.Errorf("token %d = %s, want %s", i, shave, swant) } } } @@ -531,8 +576,8 @@ var characterTests = []struct { {"\xef\xbf\xbe", "illegal character code U+FFFE"}, {"\r\n\x07", "illegal character code U+0007"}, {"what's up", "expected attribute name in element"}, - {"&\x01;", "invalid character entity &;"}, - {"&\xef\xbf\xbe;", "invalid character entity &;"}, + {"&\x01;", "invalid character entity & (no semicolon)"}, + {"&\xef\xbf\xbe;", "invalid character entity & (no semicolon)"}, } func TestDisallowedCharacters(t *testing.T) {