]> Cypherpunks repositories - gostls13.git/commitdiff
exp/template/html: handle custom attrs and HTML5 embedded elements.
authorMike Samuel <mikesamuel@gmail.com>
Wed, 28 Sep 2011 21:07:48 +0000 (14:07 -0700)
committerMike Samuel <mikesamuel@gmail.com>
Wed, 28 Sep 2011 21:07:48 +0000 (14:07 -0700)
HTML5 allows embedded SVG and MathML.
Code searches show SVG is used for graphing.

This changes transition to deal with constructs like
   <svg xmlns:xlink="http://www.w3.org/1999/xlink">
It changes attr and clients to call a single function that combines
the name lookup and "on" prefix check to determine an attribute
value type given an attribute name.

That function uses heuristics to recognize that
     xlink:href and svg:href
have URL content, and that data-url is likely contains URL content,
since "javascript:" injection is such a problem.

I did a code search over a closure templates codebase to determine
patterns of custom attribute usage.  I did something like

$ find . -name \*.soy | \
    xargs egrep perl -ne 'while (s/\b((data-|\w+:)\w+)\s*=//) { print "$1\n"; }' | \
    sort | uniq

to produce the list at the bottom.

Filtering that by egrep -i 'src|url|uri' produces

data-docConsumptionUri
data-docIconUrl
data-launchUrl
data-lazySrc
data-pageUrl
data-shareurl
data-suggestServerUrl
data-tweetUrl
g:secondaryurls
g:url

which seem to match all the ones that are likely URL content.
There are some short words that match that heuristic, but I still think it decent since
any custom attribute that has a numeric or enumerated keyword value will be unaffected by
the URL assumption.
Counterexamples from /usr/share/dict:
during, hourly, maturity, nourish, purloin, security, surly

Custom attributes present in existing closure templates codebase:
buzz:aid
data-a
data-action
data-actor
data-allowEqualityOps
data-analyticsId
data-bid
data-c
data-cartId
data-categoryId
data-cid
data-command
data-count
data-country
data-creativeId
data-cssToken
data-dest
data-docAttribution
data-docConsumptionUri
data-docCurrencyCode
data-docIconUrl
data-docId
data-docPrice
data-docPriceMicros
data-docTitle
data-docType
data-docid
data-email
data-entityid
data-errorindex
data-f
data-feature
data-fgid
data-filter
data-fireEvent
data-followable
data-followed
data-hashChange
data-height
data-hover
data-href
data-id
data-index
data-invitable
data-isFree
data-isPurchased
data-jid
data-jumpid
data-launchUrl
data-lazySrc
data-listType
data-maxVisiblePages
data-name
data-nid
data-nodeid
data-numItems
data-numPerPage
data-offerType
data-oid
data-opUsesEquality
data-overflowclass
data-packageName
data-pageId
data-pageUrl
data-pos
data-priceBrief
data-profileIds
data-query
data-rating
data-ref
data-rentalGrantPeriodDays
data-rentalactivePeriodHours
data-reviewId
data-role
data-score
data-shareurl
data-showGeLe
data-showLineInclude
data-size
data-sortval
data-suggestServerType
data-suggestServerUrl
data-suggestionIndex
data-tabBarId
data-tabBarIndex
data-tags
data-target
data-textColor
data-theme
data-title
data-toggletarget
data-tooltip
data-trailerId
data-transactionId
data-transition
data-ts
data-tweetContent
data-tweetUrl
data-type
data-useAjax
data-value
data-width
data-x
dm:index
dm:type
g:aspects
g:decorateusingsecondary
g:em
g:entity
g:groups
g:id
g:istoplevel
g:li
g:numresults
g:oid
g:parentId
g:pl
g:pt
g:rating_override
g:secondaryurls
g:sortby
g:startindex
g:target
g:type
g:url
g:value
ga:barsize
ga:css
ga:expandAfterCharsExceed
ga:initialNumRows
ga:nocancelicon
ga:numRowsToExpandTo
ga:type
ga:unlockwhenrated
gw:address
gw:businessname
gw:comment
gw:phone
gw:source
ng:controller
xlink:href
xml:lang
xmlns:atom
xmlns:dc
xmlns:jstd
xmlns:ng
xmlns:og
xmlns:webstore
xmlns:xlink

R=nigeltao
CC=golang-dev
https://golang.org/cl/5119041

src/pkg/exp/template/html/attr.go
src/pkg/exp/template/html/escape_test.go
src/pkg/exp/template/html/html.go
src/pkg/exp/template/html/transition.go

index cc57f8bd8a77448321046507c88b58722325e9c4..6a36c7b7181104a5feccc8f1a5f9db2e9dd20530 100644 (file)
 
 package html
 
-// attrType[n] describes the value of the given attribute.
+import (
+       "strings"
+)
+
+// attrTypeMap[n] describes the value of the given attribute.
 // If an attribute affects (or can mask) the encoding or interpretation of
 // other content, or affects the contents, idempotency, or credentials of a
 // network message, then the value in this map is contentTypeUnsafe.
 // This map is derived from HTML5, specifically
-// http://www.w3.org/TR/html5/Overview.html#attributes-1 and
-// http://www.w3.org/TR/html5/Overview.html#event-handlers-on-elements-document-objects-and-window-objects
+// http://www.w3.org/TR/html5/Overview.html#attributes-1
 // as well as "%URI"-typed attributes from
 // http://www.w3.org/TR/html4/index/attributes.html
-var attrType = map[string]contentType{
-       "accept":             contentTypePlain,
-       "accept-charset":     contentTypeUnsafe,
-       "action":             contentTypeURL,
-       "alt":                contentTypePlain,
-       "archive":            contentTypeURL,
-       "async":              contentTypeUnsafe,
-       "autocomplete":       contentTypePlain,
-       "autofocus":          contentTypePlain,
-       "autoplay":           contentTypePlain,
-       "background":         contentTypeURL,
-       "border":             contentTypePlain,
-       "checked":            contentTypePlain,
-       "cite":               contentTypeURL,
-       "challenge":          contentTypeUnsafe,
-       "charset":            contentTypeUnsafe,
-       "class":              contentTypePlain,
-       "classid":            contentTypeURL,
-       "codebase":           contentTypeURL,
-       "cols":               contentTypePlain,
-       "colspan":            contentTypePlain,
-       "content":            contentTypeUnsafe,
-       "contenteditable":    contentTypePlain,
-       "contextmenu":        contentTypePlain,
-       "controls":           contentTypePlain,
-       "coords":             contentTypePlain,
-       "crossorigin":        contentTypeUnsafe,
-       "data":               contentTypeURL,
-       "datetime":           contentTypePlain,
-       "default":            contentTypePlain,
-       "defer":              contentTypeUnsafe,
-       "dir":                contentTypePlain,
-       "dirname":            contentTypePlain,
-       "disabled":           contentTypePlain,
-       "draggable":          contentTypePlain,
-       "dropzone":           contentTypePlain,
-       "enctype":            contentTypeUnsafe,
-       "for":                contentTypePlain,
-       "form":               contentTypeUnsafe,
-       "formaction":         contentTypeURL,
-       "formenctype":        contentTypeUnsafe,
-       "formmethod":         contentTypeUnsafe,
-       "formnovalidate":     contentTypeUnsafe,
-       "formtarget":         contentTypePlain,
-       "headers":            contentTypePlain,
-       "height":             contentTypePlain,
-       "hidden":             contentTypePlain,
-       "high":               contentTypePlain,
-       "href":               contentTypeURL,
-       "hreflang":           contentTypePlain,
-       "http-equiv":         contentTypeUnsafe,
-       "icon":               contentTypeURL,
-       "id":                 contentTypePlain,
-       "ismap":              contentTypePlain,
-       "keytype":            contentTypeUnsafe,
-       "kind":               contentTypePlain,
-       "label":              contentTypePlain,
-       "lang":               contentTypePlain,
-       "language":           contentTypeUnsafe,
-       "list":               contentTypePlain,
-       "longdesc":           contentTypeURL,
-       "loop":               contentTypePlain,
-       "low":                contentTypePlain,
-       "manifest":           contentTypeURL,
-       "max":                contentTypePlain,
-       "maxlength":          contentTypePlain,
-       "media":              contentTypePlain,
-       "mediagroup":         contentTypePlain,
-       "method":             contentTypeUnsafe,
-       "min":                contentTypePlain,
-       "multiple":           contentTypePlain,
-       "name":               contentTypePlain,
-       "novalidate":         contentTypeUnsafe,
-       "onabort":            contentTypeJS,
-       "onblur":             contentTypeJS,
-       "oncanplay":          contentTypeJS,
-       "oncanplaythrough":   contentTypeJS,
-       "onchange":           contentTypeJS,
-       "onclick":            contentTypeJS,
-       "oncontextmenu":      contentTypeJS,
-       "oncuechange":        contentTypeJS,
-       "ondblclick":         contentTypeJS,
-       "ondrag":             contentTypeJS,
-       "ondragend":          contentTypeJS,
-       "ondragenter":        contentTypeJS,
-       "ondragleave":        contentTypeJS,
-       "ondragover":         contentTypeJS,
-       "ondragstart":        contentTypeJS,
-       "ondrop":             contentTypeJS,
-       "ondurationchange":   contentTypeJS,
-       "onemptied":          contentTypeJS,
-       "onended":            contentTypeJS,
-       "onerror":            contentTypeJS,
-       "onfocus":            contentTypeJS,
-       "oninput":            contentTypeJS,
-       "oninvalid":          contentTypeJS,
-       "onkeydown":          contentTypeJS,
-       "onkeypress":         contentTypeJS,
-       "onkeyup":            contentTypeJS,
-       "onload":             contentTypeJS,
-       "onloadeddata":       contentTypeJS,
-       "onloadedmetadata":   contentTypeJS,
-       "onloadstart":        contentTypeJS,
-       "onmousedown":        contentTypeJS,
-       "onmousemove":        contentTypeJS,
-       "onmouseout":         contentTypeJS,
-       "onmouseover":        contentTypeJS,
-       "onmouseup":          contentTypeJS,
-       "onmousewheel":       contentTypeJS,
-       "onpause":            contentTypeJS,
-       "onplay":             contentTypeJS,
-       "onplaying":          contentTypeJS,
-       "onprogress":         contentTypeJS,
-       "onratechange":       contentTypeJS,
-       "onreadystatechange": contentTypeJS,
-       "onreset":            contentTypeJS,
-       "onscroll":           contentTypeJS,
-       "onseeked":           contentTypeJS,
-       "onseeking":          contentTypeJS,
-       "onselect":           contentTypeJS,
-       "onshow":             contentTypeJS,
-       "onstalled":          contentTypeJS,
-       "onsubmit":           contentTypeJS,
-       "onsuspend":          contentTypeJS,
-       "ontimeupdate":       contentTypeJS,
-       "onvolumechange":     contentTypeJS,
-       "onwaiting":          contentTypeJS,
-       "open":               contentTypePlain,
-       "optimum":            contentTypePlain,
-       "pattern":            contentTypeUnsafe,
-       "placeholder":        contentTypePlain,
-       "poster":             contentTypeURL,
-       "profile":            contentTypeURL,
-       "preload":            contentTypePlain,
-       "pubdate":            contentTypePlain,
-       "radiogroup":         contentTypePlain,
-       "readonly":           contentTypePlain,
-       "rel":                contentTypeUnsafe,
-       "required":           contentTypePlain,
-       "reversed":           contentTypePlain,
-       "rows":               contentTypePlain,
-       "rowspan":            contentTypePlain,
-       "sandbox":            contentTypeUnsafe,
-       "spellcheck":         contentTypePlain,
-       "scope":              contentTypePlain,
-       "scoped":             contentTypePlain,
-       "seamless":           contentTypePlain,
-       "selected":           contentTypePlain,
-       "shape":              contentTypePlain,
-       "size":               contentTypePlain,
-       "sizes":              contentTypePlain,
-       "span":               contentTypePlain,
-       "src":                contentTypeURL,
-       "srcdoc":             contentTypeHTML,
-       "srclang":            contentTypePlain,
-       "start":              contentTypePlain,
-       "step":               contentTypePlain,
-       "style":              contentTypeCSS,
-       "tabindex":           contentTypePlain,
-       "target":             contentTypePlain,
-       "title":              contentTypePlain,
-       "type":               contentTypeUnsafe,
-       "usemap":             contentTypeURL,
-       "value":              contentTypeUnsafe,
-       "width":              contentTypePlain,
-       "wrap":               contentTypePlain,
+var attrTypeMap = map[string]contentType{
+       "accept":          contentTypePlain,
+       "accept-charset":  contentTypeUnsafe,
+       "action":          contentTypeURL,
+       "alt":             contentTypePlain,
+       "archive":         contentTypeURL,
+       "async":           contentTypeUnsafe,
+       "autocomplete":    contentTypePlain,
+       "autofocus":       contentTypePlain,
+       "autoplay":        contentTypePlain,
+       "background":      contentTypeURL,
+       "border":          contentTypePlain,
+       "checked":         contentTypePlain,
+       "cite":            contentTypeURL,
+       "challenge":       contentTypeUnsafe,
+       "charset":         contentTypeUnsafe,
+       "class":           contentTypePlain,
+       "classid":         contentTypeURL,
+       "codebase":        contentTypeURL,
+       "cols":            contentTypePlain,
+       "colspan":         contentTypePlain,
+       "content":         contentTypeUnsafe,
+       "contenteditable": contentTypePlain,
+       "contextmenu":     contentTypePlain,
+       "controls":        contentTypePlain,
+       "coords":          contentTypePlain,
+       "crossorigin":     contentTypeUnsafe,
+       "data":            contentTypeURL,
+       "datetime":        contentTypePlain,
+       "default":         contentTypePlain,
+       "defer":           contentTypeUnsafe,
+       "dir":             contentTypePlain,
+       "dirname":         contentTypePlain,
+       "disabled":        contentTypePlain,
+       "draggable":       contentTypePlain,
+       "dropzone":        contentTypePlain,
+       "enctype":         contentTypeUnsafe,
+       "for":             contentTypePlain,
+       "form":            contentTypeUnsafe,
+       "formaction":      contentTypeURL,
+       "formenctype":     contentTypeUnsafe,
+       "formmethod":      contentTypeUnsafe,
+       "formnovalidate":  contentTypeUnsafe,
+       "formtarget":      contentTypePlain,
+       "headers":         contentTypePlain,
+       "height":          contentTypePlain,
+       "hidden":          contentTypePlain,
+       "high":            contentTypePlain,
+       "href":            contentTypeURL,
+       "hreflang":        contentTypePlain,
+       "http-equiv":      contentTypeUnsafe,
+       "icon":            contentTypeURL,
+       "id":              contentTypePlain,
+       "ismap":           contentTypePlain,
+       "keytype":         contentTypeUnsafe,
+       "kind":            contentTypePlain,
+       "label":           contentTypePlain,
+       "lang":            contentTypePlain,
+       "language":        contentTypeUnsafe,
+       "list":            contentTypePlain,
+       "longdesc":        contentTypeURL,
+       "loop":            contentTypePlain,
+       "low":             contentTypePlain,
+       "manifest":        contentTypeURL,
+       "max":             contentTypePlain,
+       "maxlength":       contentTypePlain,
+       "media":           contentTypePlain,
+       "mediagroup":      contentTypePlain,
+       "method":          contentTypeUnsafe,
+       "min":             contentTypePlain,
+       "multiple":        contentTypePlain,
+       "name":            contentTypePlain,
+       "novalidate":      contentTypeUnsafe,
+       // Skip handler names from
+       // http://www.w3.org/TR/html5/Overview.html#event-handlers-on-elements-document-objects-and-window-objects
+       // since we have special handling in attrType.
+       "open":        contentTypePlain,
+       "optimum":     contentTypePlain,
+       "pattern":     contentTypeUnsafe,
+       "placeholder": contentTypePlain,
+       "poster":      contentTypeURL,
+       "profile":     contentTypeURL,
+       "preload":     contentTypePlain,
+       "pubdate":     contentTypePlain,
+       "radiogroup":  contentTypePlain,
+       "readonly":    contentTypePlain,
+       "rel":         contentTypeUnsafe,
+       "required":    contentTypePlain,
+       "reversed":    contentTypePlain,
+       "rows":        contentTypePlain,
+       "rowspan":     contentTypePlain,
+       "sandbox":     contentTypeUnsafe,
+       "spellcheck":  contentTypePlain,
+       "scope":       contentTypePlain,
+       "scoped":      contentTypePlain,
+       "seamless":    contentTypePlain,
+       "selected":    contentTypePlain,
+       "shape":       contentTypePlain,
+       "size":        contentTypePlain,
+       "sizes":       contentTypePlain,
+       "span":        contentTypePlain,
+       "src":         contentTypeURL,
+       "srcdoc":      contentTypeHTML,
+       "srclang":     contentTypePlain,
+       "start":       contentTypePlain,
+       "step":        contentTypePlain,
+       "style":       contentTypeCSS,
+       "tabindex":    contentTypePlain,
+       "target":      contentTypePlain,
+       "title":       contentTypePlain,
+       "type":        contentTypeUnsafe,
+       "usemap":      contentTypeURL,
+       "value":       contentTypeUnsafe,
+       "width":       contentTypePlain,
+       "wrap":        contentTypePlain,
+       "xmlns":       contentTypeURL,
+}
+
+// attrType returns a conservative (upper-bound on authority) guess at the
+// type of the named attribute.
+func attrType(name string) contentType {
+       name = strings.ToLower(name)
+       if strings.HasPrefix(name, "data-") {
+               // Strip data- so that custom attribute heuristics below are
+               // widely applied.
+               // Treat data-action as URL below.
+               name = name[5:]
+       } else if colon := strings.IndexRune(name, ':'); colon != -1 {
+               if name[:colon] == "xmlns" {
+                       return contentTypeURL
+               }
+               // Treat svg:href and xlink:href as href below.
+               name = name[colon+1:]
+       }
+       if t, ok := attrTypeMap[name]; ok {
+               return t
+       }
+       // Treat partial event handler names as script.
+       if strings.HasPrefix(name, "on") {
+               return contentTypeJS
+       }
 
-       // TODO: data-* attrs?  Recognize data-foo-url and similar.
+       // Heuristics to prevent "javascript:..." injection in custom
+       // data attributes and custom attributes like g:tweetUrl.
+       // http://www.w3.org/TR/html5/elements.html#embedding-custom-non-visible-data-with-the-data-attributes:
+       // "Custom data attributes are intended to store custom data
+       //  private to the page or application, for which there are no
+       //  more appropriate attributes or elements."
+       // Developers seem to store URL content in data URLs that start
+       // or end with "URI" or "URL".
+       if strings.Contains(name, "src") ||
+               strings.Contains(name, "uri") ||
+               strings.Contains(name, "url") {
+               return contentTypeURL
+       }
+       return contentTypePlain
 }
index 0ca3c56619133a4d710093d51a4eb571d2ca988f..169cb762672a59e13f9a5156ad6028895c231cec 100644 (file)
@@ -1400,6 +1400,66 @@ func TestEscapeText(t *testing.T) {
                        `<style>value`,
                        context{state: stateCSS, element: elementStyle},
                },
+               {
+                       `<a xlink:href`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a xmlns`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a xmlns:foo`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a xmlnsxyz`,
+                       context{state: stateAttrName},
+               },
+               {
+                       `<a data-url`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a data-iconUri`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a data-urlItem`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a g:`,
+                       context{state: stateAttrName},
+               },
+               {
+                       `<a g:url`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a g:iconUri`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a g:urlItem`,
+                       context{state: stateAttrName, attr: attrURL},
+               },
+               {
+                       `<a g:value`,
+                       context{state: stateAttrName},
+               },
+               {
+                       `<a svg:style='`,
+                       context{state: stateCSS, delim: delimSingleQuote},
+               },
+               {
+                       `<svg:font-face`,
+                       context{state: stateTag},
+               },
+               {
+                       `<svg:a svg:onclick="`,
+                       context{state: stateJS, delim: delimDoubleQuote},
+               },
        }
 
        for _, test := range tests {
index 6ef66dd6c3ca74da17eff73eaaae6c16f2c6854d..91bb1b17049f3fa3482e1688e548d62a3cb6fd5e 100644 (file)
@@ -230,7 +230,7 @@ func htmlNameFilter(args ...interface{}) string {
                return filterFailsafe
        }
        s = strings.ToLower(s)
-       if t := attrType[s]; t != contentTypePlain && attrType["on"+s] != contentTypeJS {
+       if t := attrType(s); t != contentTypePlain {
                // TODO: Split attr and element name part filters so we can whitelist
                // attributes.
                return filterFailsafe
index d3c8a0529184bababe1edfd70299ad5fc5a3f0e5..49a14511745bd99599a07e9411c6cdc4d4fcbd8f 100644 (file)
@@ -106,18 +106,13 @@ func tTag(c context, s []byte) (context, int) {
                        err:   errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
                }, len(s)
        }
-       canonAttrName := strings.ToLower(string(s[i:j]))
-       switch attrType[canonAttrName] {
+       switch attrType(string(s[i:j])) {
        case contentTypeURL:
                attr = attrURL
        case contentTypeCSS:
                attr = attrStyle
        case contentTypeJS:
                attr = attrScript
-       default:
-               if strings.HasPrefix(canonAttrName, "on") {
-                       attr = attrScript
-               }
        }
        if j == len(s) {
                state = stateAttrName
@@ -512,16 +507,34 @@ var elementNameMap = map[string]element{
        "title":    elementTitle,
 }
 
+// asciiAlpha returns whether c is an ASCII letter.
+func asciiAlpha(c byte) bool {
+       return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+// asciiAlphaNum returns whether c is an ASCII letter or digit.
+func asciiAlphaNum(c byte) bool {
+       return asciiAlpha(c) || '0' <= c && c <= '9'
+}
+
 // eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
 func eatTagName(s []byte, i int) (int, element) {
-       j := i
-       for ; j < len(s); j++ {
+       if i == len(s) || !asciiAlpha(s[i]) {
+               return i, elementNone
+       }
+       j := i + 1
+       for j < len(s) {
                x := s[j]
-               if !(('a' <= x && x <= 'z') ||
-                       ('A' <= x && x <= 'Z') ||
-                       ('0' <= x && x <= '9' && i != j)) {
-                       break
+               if asciiAlphaNum(x) {
+                       j++
+                       continue
+               }
+               // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
+               if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
+                       j += 2
+                       continue
                }
+               break
        }
        return j, elementNameMap[strings.ToLower(string(s[i:j]))]
 }