]> Cypherpunks repositories - gostls13.git/commitdiff
xml: permit nested directives
authorChris Dollin <ehog.hedge@gmail.com>
Mon, 28 Feb 2011 19:09:04 +0000 (14:09 -0500)
committerRuss Cox <rsc@golang.org>
Mon, 28 Feb 2011 19:09:04 +0000 (14:09 -0500)
Return <!DOCTYPE ...> with nested directives as one big token.

Fixes #1549.

R=niemeyer, rsc
CC=golang-dev
https://golang.org/cl/4216050

src/pkg/xml/xml.go
src/pkg/xml/xml_test.go

index 417b4edfde0926cc8dc530621b2f3f64d1617765..691c13a1188a4f8c11f620bf46f9a512e265037d 100644 (file)
@@ -541,17 +541,36 @@ func (p *Parser) RawToken() (Token, os.Error) {
                }
 
                // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc.
-               // We don't care, but accumulate for caller.
+               // We don't care, but accumulate for caller. Quoted angle
+               // brackets do not count for nesting.
                p.buf.Reset()
                p.buf.WriteByte(b)
+               inquote := uint8(0)
+               depth := 0
                for {
                        if b, ok = p.mustgetc(); !ok {
                                return nil, p.err
                        }
-                       if b == '>' {
+                       if inquote == 0 && b == '>' && depth == 0 {
                                break
                        }
                        p.buf.WriteByte(b)
+                       switch {
+                       case b == inquote:
+                               inquote = 0
+
+                       case inquote != 0:
+                               // in quotes, no special action
+
+                       case b == '\'' || b == '"':
+                               inquote = b
+
+                       case b == '>' && inquote == 0:
+                               depth--
+
+                       case b == '<' && inquote == 0:
+                               depth++
+                       }
                }
                return Directive(p.buf.Bytes()), nil
        }
index 317ecabd90de7705fc2cf6c684551a9ad33f79fa..887bc3d140fec48ff96fc109119e90ee3e0c653b 100644 (file)
@@ -185,6 +185,52 @@ func TestRawToken(t *testing.T) {
        }
 }
 
+// Ensure that directives (specifically !DOCTYPE) include the complete
+// text of any nested directives, noting that < and > do not change
+// nesting depth if they are in single or double quotes.
+
+var nestedDirectivesInput = `
+<!DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]>
+<!DOCTYPE [<!ENTITY xlt ">">]>
+<!DOCTYPE [<!ENTITY xlt "<">]>
+<!DOCTYPE [<!ENTITY xlt '>'>]>
+<!DOCTYPE [<!ENTITY xlt '<'>]>
+<!DOCTYPE [<!ENTITY xlt '">'>]>
+<!DOCTYPE [<!ENTITY xlt "'<">]>
+`
+
+var nestedDirectivesTokens = []Token{
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#">]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt ">">]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt "<">]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt '>'>]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt '<'>]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt '">'>]`)),
+       CharData([]byte("\n")),
+       Directive([]byte(`DOCTYPE [<!ENTITY xlt "'<">]`)),
+       CharData([]byte("\n")),
+}
+
+func TestNestedDirectives(t *testing.T) {
+       p := NewParser(StringReader(nestedDirectivesInput))
+
+       for i, want := range nestedDirectivesTokens {
+               have, err := p.Token()
+               if err != nil {
+                       t.Fatalf("token %d: unexpected error: %s", i, err)
+               }
+               if !reflect.DeepEqual(have, want) {
+                       t.Errorf("token %d = %#v want %#v", i, have, want)
+               }
+       }
+}
+
 func TestToken(t *testing.T) {
        p := NewParser(StringReader(testInput))