From: Nigel Tao Date: Tue, 10 Jan 2012 00:06:09 +0000 (+1100) Subject: html: foreign element HTML integration points, tag name adjustment, X-Git-Tag: weekly.2012-01-15~112 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=748fab9d11e23b8f8c17cd583f995252ec86bfd3;p=gostls13.git html: foreign element HTML integration points, tag name adjustment, shorten the MathML namespace abbreviation from "mathml" to "math". Python's html5lib uses "mathml", but I think that that is an internal implementation detail; the test cases use "math". Pass tests10.dat, test 30:
a | | | |
| | | | | "a" R=andybalholm CC=golang-dev https://golang.org/cl/5529044 --- diff --git a/src/pkg/html/foreign.go b/src/pkg/html/foreign.go index 9a0520398c..3ba81ce4d6 100644 --- a/src/pkg/html/foreign.go +++ b/src/pkg/html/foreign.go @@ -23,6 +23,23 @@ func adjustForeignAttributes(aa []Attribute) { } } +func htmlIntegrationPoint(n *Node) bool { + if n.Type != ElementNode { + return false + } + switch n.Namespace { + case "math": + // TODO: annotation-xml elements whose start tags have "text/html" or + // "application/xhtml+xml" encodings. + case "svg": + switch n.Data { + case "desc", "foreignObject", "title": + return true + } + } + return false +} + // Section 12.2.5.5. var breakout = map[string]bool{ "b": true, @@ -72,4 +89,44 @@ var breakout = map[string]bool{ "var": true, } -// TODO: add look-up tables for MathML and SVG adjustments. +// Section 12.2.5.5. +var svgTagNameAdjustments = map[string]string{ + "altglyph": "altGlyph", + "altglyphdef": "altGlyphDef", + "altglyphitem": "altGlyphItem", + "animatecolor": "animateColor", + "animatemotion": "animateMotion", + "animatetransform": "animateTransform", + "clippath": "clipPath", + "feblend": "feBlend", + "fecolormatrix": "feColorMatrix", + "fecomponenttransfer": "feComponentTransfer", + "fecomposite": "feComposite", + "feconvolvematrix": "feConvolveMatrix", + "fediffuselighting": "feDiffuseLighting", + "fedisplacementmap": "feDisplacementMap", + "fedistantlight": "feDistantLight", + "feflood": "feFlood", + "fefunca": "feFuncA", + "fefuncb": "feFuncB", + "fefuncg": "feFuncG", + "fefuncr": "feFuncR", + "fegaussianblur": "feGaussianBlur", + "feimage": "feImage", + "femerge": "feMerge", + "femergenode": "feMergeNode", + "femorphology": "feMorphology", + "feoffset": "feOffset", + "fepointlight": "fePointLight", + "fespecularlighting": "feSpecularLighting", + "fespotlight": "feSpotLight", + "fetile": "feTile", + "feturbulence": "feTurbulence", + "foreignobject": "foreignObject", + "glyphref": "glyphRef", + "lineargradient": "linearGradient", + "radialgradient": "radialGradient", + "textpath": "textPath", +} + +// TODO: add look-up tables for MathML and SVG attribute adjustments. diff --git a/src/pkg/html/node.go b/src/pkg/html/node.go index 4ba3f5fb62..83f17308b1 100644 --- a/src/pkg/html/node.go +++ b/src/pkg/html/node.go @@ -26,6 +26,10 @@ var scopeMarker = Node{Type: scopeMarkerNode} // content for text) and are part of a tree of Nodes. Element nodes may also // have a Namespace and contain a slice of Attributes. Data is unescaped, so // that it looks like "a= 0; i-- { tag := p.oe[i].Data - for _, t := range matchTags { - if t == tag { - return i + if p.oe[i].Namespace == "" { + for _, t := range matchTags { + if t == tag { + return i + } + } + switch s { + case defaultScope: + // No-op. + case listItemScope: + if tag == "ol" || tag == "ul" { + return -1 + } + case buttonScope: + if tag == "button" { + return -1 + } + case tableScope: + if tag == "html" || tag == "table" { + return -1 + } + default: + panic("unreachable") } } - for _, t := range stopTags { - if t == tag { - return -1 + switch s { + case defaultScope, listItemScope, buttonScope: + for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { + if t == tag { + return -1 + } } } } @@ -111,8 +140,30 @@ func (p *parser) indexOfElementInScope(stopTags []string, matchTags ...string) i // elementInScope is like popUntil, except that it doesn't modify the stack of // open elements. -func (p *parser) elementInScope(stopTags []string, matchTags ...string) bool { - return p.indexOfElementInScope(stopTags, matchTags...) != -1 +func (p *parser) elementInScope(s scope, matchTags ...string) bool { + return p.indexOfElementInScope(s, matchTags...) != -1 +} + +// clearStackToContext pops elements off the stack of open elements until a +// scope-defined element is found. +func (p *parser) clearStackToContext(s scope) { + for i := len(p.oe) - 1; i >= 0; i-- { + tag := p.oe[i].Data + switch s { + case tableScope: + if tag == "html" || tag == "table" { + p.oe = p.oe[:i+1] + return + } + case tableRowScope: + if tag == "html" || tag == "tr" { + p.oe = p.oe[:i+1] + return + } + default: + panic("unreachable") + } + } } // addChild adds a child node n to the top element, and pushes n onto the stack @@ -624,10 +675,10 @@ func inBodyIM(p *parser) bool { case "html": copyAttributes(p.oe[0], p.tok) case "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul": - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) case "h1", "h2", "h3", "h4", "h5", "h6": - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") switch n := p.top(); n.Data { case "h1", "h2", "h3", "h4", "h5", "h6": p.oe.pop() @@ -649,7 +700,7 @@ func inBodyIM(p *parser) bool { p.addFormattingElement(p.tok.Data, p.tok.Attr) case "nobr": p.reconstructActiveFormattingElements() - if p.elementInScope(defaultScopeStopTags, "nobr") { + if p.elementInScope(defaultScope, "nobr") { p.inBodyEndTagFormatting("nobr") p.reconstructActiveFormattingElements() } @@ -667,14 +718,14 @@ func inBodyIM(p *parser) bool { p.framesetOK = false case "table": if !p.quirks { - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") } p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false p.im = inTableIM return true case "hr": - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) p.oe.pop() p.acknowledgeSelfClosingTag() @@ -687,7 +738,7 @@ func inBodyIM(p *parser) bool { return true case "form": if p.form == nil { - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) p.form = p.top() } @@ -697,7 +748,7 @@ func inBodyIM(p *parser) bool { node := p.oe[i] switch node.Data { case "li": - p.popUntil(listItemScopeStopTags, "li") + p.popUntil(listItemScope, "li") case "address", "div", "p": continue default: @@ -707,7 +758,7 @@ func inBodyIM(p *parser) bool { } break } - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) case "dd", "dt": p.framesetOK = false @@ -725,13 +776,13 @@ func inBodyIM(p *parser) bool { } break } - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) case "plaintext": - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement(p.tok.Data, p.tok.Attr) case "button": - p.popUntil(defaultScopeStopTags, "button") + p.popUntil(defaultScope, "button") p.reconstructActiveFormattingElements() p.addElement(p.tok.Data, p.tok.Attr) p.framesetOK = false @@ -788,7 +839,7 @@ func inBodyIM(p *parser) bool { } } p.acknowledgeSelfClosingTag() - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.addElement("form", nil) p.form = p.top() if action != "" { @@ -806,23 +857,20 @@ func inBodyIM(p *parser) bool { p.oe.pop() p.form = nil case "xmp": - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") p.reconstructActiveFormattingElements() p.framesetOK = false p.addElement(p.tok.Data, p.tok.Attr) case "math", "svg": p.reconstructActiveFormattingElements() - namespace := "" if p.tok.Data == "math" { // TODO: adjust MathML attributes. - namespace = "mathml" } else { // TODO: adjust SVG attributes. - namespace = "svg" } adjustForeignAttributes(p.tok.Attr) p.addElement(p.tok.Data, p.tok.Attr) - p.top().Namespace = namespace + p.top().Namespace = p.tok.Data return true case "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr": // Ignore the token. @@ -837,16 +885,16 @@ func inBodyIM(p *parser) bool { p.im = afterBodyIM return true case "p": - if !p.elementInScope(buttonScopeStopTags, "p") { + if !p.elementInScope(buttonScope, "p") { p.addElement("p", nil) } - p.popUntil(buttonScopeStopTags, "p") + p.popUntil(buttonScope, "p") case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u": p.inBodyEndTagFormatting(p.tok.Data) case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul": - p.popUntil(defaultScopeStopTags, p.tok.Data) + p.popUntil(defaultScope, p.tok.Data) case "applet", "marquee", "object": - if p.popUntil(defaultScopeStopTags, p.tok.Data) { + if p.popUntil(defaultScope, p.tok.Data) { p.clearActiveFormattingElements() } case "br": @@ -895,7 +943,7 @@ func (p *parser) inBodyEndTagFormatting(tag string) { p.afe.remove(formattingElement) return } - if !p.elementInScope(defaultScopeStopTags, tag) { + if !p.elementInScope(defaultScope, tag) { // Ignore the tag. return } @@ -1029,35 +1077,35 @@ func inTableIM(p *parser) bool { case StartTagToken: switch p.tok.Data { case "caption": - p.clearStackToContext(tableScopeStopTags) + p.clearStackToContext(tableScope) p.afe = append(p.afe, &scopeMarker) p.addElement(p.tok.Data, p.tok.Attr) p.im = inCaptionIM return true case "tbody", "tfoot", "thead": - p.clearStackToContext(tableScopeStopTags) + p.clearStackToContext(tableScope) p.addElement(p.tok.Data, p.tok.Attr) p.im = inTableBodyIM return true case "td", "th", "tr": - p.clearStackToContext(tableScopeStopTags) + p.clearStackToContext(tableScope) p.addElement("tbody", nil) p.im = inTableBodyIM return false case "table": - if p.popUntil(tableScopeStopTags, "table") { + if p.popUntil(tableScope, "table") { p.resetInsertionMode() return false } // Ignore the token. return true case "colgroup": - p.clearStackToContext(tableScopeStopTags) + p.clearStackToContext(tableScope) p.addElement(p.tok.Data, p.tok.Attr) p.im = inColumnGroupIM return true case "col": - p.clearStackToContext(tableScopeStopTags) + p.clearStackToContext(tableScope) p.addElement("colgroup", p.tok.Attr) p.im = inColumnGroupIM return false @@ -1078,7 +1126,7 @@ func inTableIM(p *parser) bool { case EndTagToken: switch p.tok.Data { case "table": - if p.popUntil(tableScopeStopTags, "table") { + if p.popUntil(tableScope, "table") { p.resetInsertionMode() return true } @@ -1105,26 +1153,13 @@ func inTableIM(p *parser) bool { return inBodyIM(p) } -// clearStackToContext pops elements off the stack of open elements -// until an element listed in stopTags is found. -func (p *parser) clearStackToContext(stopTags []string) { - for i := len(p.oe) - 1; i >= 0; i-- { - for _, tag := range stopTags { - if p.oe[i].Data == tag { - p.oe = p.oe[:i+1] - return - } - } - } -} - // Section 12.2.5.4.11. func inCaptionIM(p *parser) bool { switch p.tok.Type { case StartTagToken: switch p.tok.Data { case "caption", "col", "colgroup", "tbody", "td", "tfoot", "thead", "tr": - if p.popUntil(tableScopeStopTags, "caption") { + if p.popUntil(tableScope, "caption") { p.clearActiveFormattingElements() p.im = inTableIM return false @@ -1142,13 +1177,13 @@ func inCaptionIM(p *parser) bool { case EndTagToken: switch p.tok.Data { case "caption": - if p.popUntil(tableScopeStopTags, "caption") { + if p.popUntil(tableScope, "caption") { p.clearActiveFormattingElements() p.im = inTableIM } return true case "table": - if p.popUntil(tableScopeStopTags, "caption") { + if p.popUntil(tableScope, "caption") { p.clearActiveFormattingElements() p.im = inTableIM return false @@ -1232,7 +1267,7 @@ func inTableBodyIM(p *parser) bool { data = "tr" consumed = false case "caption", "col", "colgroup", "tbody", "tfoot", "thead": - if !p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") { + if !p.popUntil(tableScope, "tbody", "thead", "tfoot") { // Ignore the token. return true } @@ -1244,7 +1279,7 @@ func inTableBodyIM(p *parser) bool { case EndTagToken: switch p.tok.Data { case "table": - if p.popUntil(tableScopeStopTags, "tbody", "thead", "tfoot") { + if p.popUntil(tableScope, "tbody", "thead", "tfoot") { p.im = inTableIM return false } @@ -1280,13 +1315,13 @@ func inRowIM(p *parser) bool { case StartTagToken: switch p.tok.Data { case "td", "th": - p.clearStackToContext(tableRowContextStopTags) + p.clearStackToContext(tableRowScope) p.addElement(p.tok.Data, p.tok.Attr) p.afe = append(p.afe, &scopeMarker) p.im = inCellIM return true case "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr": - if p.popUntil(tableScopeStopTags, "tr") { + if p.popUntil(tableScope, "tr") { p.im = inTableBodyIM return false } @@ -1298,14 +1333,14 @@ func inRowIM(p *parser) bool { case EndTagToken: switch p.tok.Data { case "tr": - if p.popUntil(tableScopeStopTags, "tr") { + if p.popUntil(tableScope, "tr") { p.im = inTableBodyIM return true } // Ignore the token. return true case "table": - if p.popUntil(tableScopeStopTags, "tr") { + if p.popUntil(tableScope, "tr") { p.im = inTableBodyIM return false } @@ -1350,7 +1385,7 @@ func inCellIM(p *parser) bool { case EndTagToken: switch p.tok.Data { case "td", "th": - if !p.popUntil(tableScopeStopTags, p.tok.Data) { + if !p.popUntil(tableScope, p.tok.Data) { // Ignore the token. return true } @@ -1371,7 +1406,7 @@ func inCellIM(p *parser) bool { return true } if closeTheCellAndReprocess { - if p.popUntil(tableScopeStopTags, "td") || p.popUntil(tableScopeStopTags, "th") { + if p.popUntil(tableScope, "td") || p.popUntil(tableScope, "th") { p.clearActiveFormattingElements() p.im = inRowIM return false @@ -1451,7 +1486,7 @@ func inSelectInTableIM(p *parser) bool { case StartTagToken, EndTagToken: switch p.tok.Data { case "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th": - if p.tok.Type == StartTagToken || p.elementInScope(tableScopeStopTags, p.tok.Data) { + if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.Data) { p.endSelect() return false } else { @@ -1672,6 +1707,11 @@ func parseForeignContent(p *parser) bool { Data: p.tok.Data, }) case StartTagToken: + if htmlIntegrationPoint(p.top()) { + inBodyIM(p) + p.resetInsertionMode() + return true + } if breakout[p.tok.Data] { for i := len(p.oe) - 1; i >= 0; i-- { // TODO: HTML, MathML integration points. @@ -1683,10 +1723,14 @@ func parseForeignContent(p *parser) bool { return false } switch p.top().Namespace { - case "mathml": + case "math": // TODO: adjust MathML attributes. case "svg": - // TODO: adjust SVG tag names. + // Adjust SVG tag names. The tokenizer lower-cases tag names, but + // SVG wants e.g. "foreignObject" with a capital second "O". + if x := svgTagNameAdjustments[p.tok.Data]; x != "" { + p.tok.Data = x + } // TODO: adjust SVG attributes. default: panic("html: bad parser state: unexpected namespace") diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go index 2b6a8b5083..91c8388b3a 100644 --- a/src/pkg/html/parse_test.go +++ b/src/pkg/html/parse_test.go @@ -184,7 +184,7 @@ func TestParser(t *testing.T) { {"tests4.dat", -1}, {"tests5.dat", -1}, {"tests6.dat", -1}, - {"tests10.dat", 30}, + {"tests10.dat", 31}, } for _, tf := range testFiles { f, err := os.Open("testdata/webkit/" + tf.filename)