return CharData(data), nil;
}
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
switch b {
return nil, p.err;
}
p.space();
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != '>' {
// the version is 1.0 and the encoding is UTF-8.
var target string;
if target, ok = p.name(); !ok {
- return nil, p.err
+ if p.err == nil {
+ p.err = SyntaxError("expected target name after <?")
+ }
+ return nil, p.err;
}
p.space();
p.buf.Reset();
var b0 byte;
for {
- if b, ok = p.getc(); !ok {
- if p.err == os.EOF {
- p.err = SyntaxError("unterminated <? directive")
- }
- return nil, p.err;
+ if b, ok = p.mustgetc(); !ok {
+ return nil, p.err
}
p.buf.WriteByte(b);
if b0 == '?' && b == '>' {
case '!':
// <!: Maybe comment, maybe CDATA.
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
switch b {
case '-': // <!-
// Probably <!-- for a comment.
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != '-' {
p.buf.Reset();
var b0, b1 byte;
for {
- if b, ok = p.getc(); !ok {
- if p.err == os.EOF {
- p.err = SyntaxError("unterminated <!-- comment")
- }
- return nil, p.err;
+ if b, ok = p.mustgetc(); !ok {
+ return nil, p.err
}
p.buf.WriteByte(b);
if b0 == '-' && b1 == '-' && b == '>' {
case '[': // <![
// Probably <![CDATA[.
for i := 0; i < 6; i++ {
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != "CDATA["[i] {
p.buf.Reset();
p.buf.WriteByte(b);
for {
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b == '>' {
attr = make([]Attr, 0, 4);
for {
p.space();
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b == '/' {
empty = true;
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != '>' {
return nil, p.err;
}
p.space();
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != '=' {
return nil, p.err;
}
p.space();
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return nil, p.err
}
if b != '"' && b != '\'' {
return b, true;
}
+// Must read a single byte.
+// If there is no byte to read,
+// set p.err to SyntaxError("unexpected EOF")
+// and return ok==false
+func (p *Parser) mustgetc() (b byte, ok bool) {
+ if b, ok = p.getc(); !ok {
+ if p.err == os.EOF {
+ p.err = SyntaxError("unexpected EOF")
+ }
+ }
+ return;
+}
+
// Unread a single byte.
func (p *Parser) ungetc(b byte) {
if b == '\n' {
p.buf.Reset();
Input:
for {
- b, ok := p.getc();
+ b, ok := p.mustgetc();
if !ok {
return nil
}
for i = 0; i < len(p.tmp); i++ {
p.tmp[i], p.err = p.r.ReadByte();
if p.err != nil {
- return nil
+ if p.err == os.EOF {
+ p.err = SyntaxError("unexpected EOF")
+ }
+ return nil;
}
c := p.tmp[i];
if c == ';' {
}
// Get name: /first(first|second)*/
-// Do not set p.err if the name is missing: let the caller provide better context.
+// Do not set p.err if the name is missing (unless unexpected EOF is received):
+// let the caller provide better context.
func (p *Parser) name() (s string, ok bool) {
var b byte;
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return
}
// As a first approximation, we gather the bytes [A-Za-z_:.-\x80-\xFF]*
if b < utf8.RuneSelf && !isNameByte(b) {
p.ungetc(b);
- return;
+ return "", false;
}
p.buf.Reset();
p.buf.WriteByte(b);
for {
- if b, ok = p.getc(); !ok {
+ if b, ok = p.mustgetc(); !ok {
return
}
if b < utf8.RuneSelf && !isNameByte(b) {
Comment(strings.Bytes(" missing final newline ")),
}
+var xmlInput = []string{
+ // unexpected EOF cases
+ "<",
+ "<t",
+ "<t ",
+ "<t/",
+ "<t/>c",
+ "<!",
+ "<!-",
+ "<!--",
+ "<!--c-",
+ "<!--c--",
+ "<!d",
+ "<t></",
+ "<t></t",
+ "<?",
+ "<?p",
+ "<t a",
+ "<t a=",
+ "<t a='",
+ "<t a=''",
+ "<t/><![",
+ "<t/><![C",
+ "<t/><![CDATA[d",
+ "<t/><![CDATA[d]",
+ "<t/><![CDATA[d]]",
+
+ // other Syntax errors
+ " ",
+ ">",
+ "<>",
+ "<t/a",
+ "<0 />",
+ "<?0 >",
+ // "<!0 >", // let the Token() caller handle
+ "</0>",
+ "<t 0=''>",
+ "<t a='&'>",
+ "<t a='<'>",
+ "<t> c;</t>",
+ "<t a>",
+ "<t a=>",
+ "<t a=v>",
+ // "<![CDATA[d]]>", // let the Token() caller handle
+ "cdata",
+ "<t></e>",
+ "<t></>",
+ "<t></t!",
+ "<t>cdata]]></t>",
+}
+
type stringReader struct {
s string;
off int;
}
}
}
+
+func TestSyntax(t *testing.T) {
+ for i := range xmlInput {
+ p := NewParser(StringReader(xmlInput[i]));
+ var err os.Error;
+ for _, err = p.Token(); err == nil; _, err = p.Token() {
+ }
+ if _, ok := err.(SyntaxError); !ok {
+ t.Fatalf(`xmlInput "%s": expected SyntaxError not received`, xmlInput[i])
+ }
+ }
+}