// Parsers are required to recognize lt, gt, amp, apos, and quot
// even if they have not been declared. That's all we allow.
var i int
+ var semicolon bool
+ var valid bool
for i = 0; i < len(d.tmp); i++ {
var ok bool
d.tmp[i], ok = d.getc()
}
c := d.tmp[i]
if c == ';' {
+ semicolon = true
+ valid = i > 0
break
}
if 'a' <= c && c <= 'z' ||
break
}
s := string(d.tmp[0:i])
- if i >= len(d.tmp) {
+ if !valid {
if !d.Strict {
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
+ if semicolon {
+ d.buf.WriteByte(';')
+ }
continue Input
}
- d.err = d.syntaxError("character entity expression &" + s + "... too long")
+ semi := ";"
+ if !semicolon {
+ semi = " (no semicolon)"
+ }
+ if i < len(d.tmp) {
+ d.err = d.syntaxError("invalid character entity &" + s + semi)
+ } else {
+ d.err = d.syntaxError("invalid character entity &" + s + "... too long")
+ }
return nil
}
var haveText bool
b0, b1 = 0, 0
d.buf.WriteByte('&')
d.buf.Write(d.tmp[0:i])
+ d.buf.WriteByte(';')
continue Input
}
d.err = d.syntaxError("invalid character entity &" + s + ";")
package xml
import (
+ "fmt"
"io"
"reflect"
"strings"
testRawToken(t, d, rawTokens)
}
+const nonStrictInput = `
+<tag>non&entity</tag>
+<tag>&unknown;entity</tag>
+<tag>{</tag>
+<tag>&#zzz;</tag>
+`
+
+var nonStrictTokens = []Token{
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("non&entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&unknown;entity"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("{"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+ StartElement{Name{"", "tag"}, []Attr{}},
+ CharData("&#zzz;"),
+ EndElement{Name{"", "tag"}},
+ CharData("\n"),
+}
+
+func TestNonStrictRawToken(t *testing.T) {
+ d := NewDecoder(strings.NewReader(nonStrictInput))
+ d.Strict = false
+ testRawToken(t, d, nonStrictTokens)
+}
+
type downCaser struct {
t *testing.T
r io.ByteReader
t.Fatalf("token %d: unexpected error: %s", i, err)
}
if !reflect.DeepEqual(have, want) {
- t.Errorf("token %d = %#v want %#v", i, have, want)
+ var shave, swant string
+ if _, ok := have.(CharData); ok {
+ shave = fmt.Sprintf("CharData(%q)", have)
+ } else {
+ shave = fmt.Sprintf("%#v", have)
+ }
+ if _, ok := want.(CharData); ok {
+ swant = fmt.Sprintf("CharData(%q)", want)
+ } else {
+ swant = fmt.Sprintf("%#v", want)
+ }
+ t.Errorf("token %d = %s, want %s", i, shave, swant)
}
}
}
{"\xef\xbf\xbe<doc/>", "illegal character code U+FFFE"},
{"<?xml version=\"1.0\"?><doc>\r\n<hiya/>\x07<toots/></doc>", "illegal character code U+0007"},
{"<?xml version=\"1.0\"?><doc \x12='value'>what's up</doc>", "expected attribute name in element"},
- {"<doc>&\x01;</doc>", "invalid character entity &;"},
- {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity &;"},
+ {"<doc>&\x01;</doc>", "invalid character entity & (no semicolon)"},
+ {"<doc>&\xef\xbf\xbe;</doc>", "invalid character entity & (no semicolon)"},
}
func TestDisallowedCharacters(t *testing.T) {