"fmt"
"html"
"os"
- "strings"
"template"
"template/parse"
)
}
return t
}
-
-// transitionFunc is the array of context transition functions for text nodes.
-// A transition function takes a context and template text input, and returns
-// the updated context and any unconsumed text.
-var transitionFunc = [...]func(context, []byte) (context, []byte){
- stateText: tText,
- stateTag: tTag,
- stateComment: tComment,
- stateRCDATA: tSpecialTagEnd,
- stateAttr: tAttr,
- stateURL: tURL,
- stateJS: tJS,
- stateJSDqStr: tJSStr,
- stateJSSqStr: tJSStr,
- stateJSRegexp: tJSRegexp,
- stateJSBlockCmt: tBlockCmt,
- stateJSLineCmt: tLineCmt,
- stateCSS: tCSS,
- stateCSSDqStr: tCSSStr,
- stateCSSSqStr: tCSSStr,
- stateCSSDqURL: tCSSStr,
- stateCSSSqURL: tCSSStr,
- stateCSSURL: tCSSStr,
- stateCSSBlockCmt: tBlockCmt,
- stateCSSLineCmt: tLineCmt,
- stateError: tError,
-}
-
-var commentStart = []byte("<!--")
-var commentEnd = []byte("-->")
-
-// tText is the context transition function for the text state.
-func tText(c context, s []byte) (context, []byte) {
- for {
- i := bytes.IndexByte(s, '<')
- if i == -1 || i+1 == len(s) {
- return c, nil
- } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
- return context{state: stateComment}, s[i+4:]
- }
- i++
- if s[i] == '/' {
- if i+1 == len(s) {
- return c, nil
- }
- i++
- }
- j, e := eatTagName(s, i)
- if j != i {
- // We've found an HTML tag.
- return context{state: stateTag, element: e}, s[j:]
- }
- s = s[j:]
- }
- panic("unreachable")
-}
-
-var elementContentType = [...]state{
- elementNone: stateText,
- elementScript: stateJS,
- elementStyle: stateCSS,
- elementTextarea: stateRCDATA,
- elementTitle: stateRCDATA,
-}
-
-// tTag is the context transition function for the tag state.
-func tTag(c context, s []byte) (context, []byte) {
- // Find the attribute name.
- attrStart := eatWhiteSpace(s, 0)
- i, err := eatAttrName(s, attrStart)
- if err != nil {
- return context{
- state: stateError,
- errStr: err.String(),
- }, nil
- }
- if i == len(s) {
- return c, nil
- }
- state := stateAttr
- canonAttrName := strings.ToLower(string(s[attrStart:i]))
- if urlAttr[canonAttrName] {
- state = stateURL
- } else if strings.HasPrefix(canonAttrName, "on") {
- state = stateJS
- } else if canonAttrName == "style" {
- state = stateCSS
- }
-
- // Look for the start of the value.
- i = eatWhiteSpace(s, i)
- if i == len(s) {
- return c, s[i:]
- }
- if s[i] == '>' {
- state = elementContentType[c.element]
- return context{state: state, element: c.element}, s[i+1:]
- } else if s[i] != '=' {
- // Possible due to a valueless attribute or '/' in "<input />".
- return c, s[i:]
- }
- // Consume the "=".
- i = eatWhiteSpace(s, i+1)
-
- // Find the attribute delimiter.
- delim := delimSpaceOrTagEnd
- if i < len(s) {
- switch s[i] {
- case '\'':
- delim, i = delimSingleQuote, i+1
- case '"':
- delim, i = delimDoubleQuote, i+1
- }
- }
-
- return context{state: state, delim: delim, element: c.element}, s[i:]
-}
-
-// tComment is the context transition function for stateComment.
-func tComment(c context, s []byte) (context, []byte) {
- i := bytes.Index(s, commentEnd)
- if i != -1 {
- return context{}, s[i+3:]
- }
- return c, nil
-}
-
-// specialTagEndMarkers maps element types to the character sequence that
-// case-insensitively signals the end of the special tag body.
-var specialTagEndMarkers = [...]string{
- elementScript: "</script",
- elementStyle: "</style",
- elementTextarea: "</textarea",
- elementTitle: "</title",
-}
-
-// tSpecialTagEnd is the context transition function for raw text and RCDATA
-// element states.
-func tSpecialTagEnd(c context, s []byte) (context, []byte) {
- if c.element != elementNone {
- end := specialTagEndMarkers[c.element]
- i := strings.Index(strings.ToLower(string(s)), end)
- if i != -1 {
- return context{state: stateTag}, s[i+len(end):]
- }
- }
- return c, nil
-}
-
-// tAttr is the context transition function for the attribute state.
-func tAttr(c context, s []byte) (context, []byte) {
- return c, nil
-}
-
-// tURL is the context transition function for the URL state.
-func tURL(c context, s []byte) (context, []byte) {
- if bytes.IndexAny(s, "#?") >= 0 {
- c.urlPart = urlPartQueryOrFrag
- } else if len(s) != 0 && c.urlPart == urlPartNone {
- c.urlPart = urlPartPreQuery
- }
- return c, nil
-}
-
-// tJS is the context transition function for the JS state.
-func tJS(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
-
- i := bytes.IndexAny(s, `"'/`)
- if i == -1 {
- // Entire input is non string, comment, regexp tokens.
- c.jsCtx = nextJSCtx(s, c.jsCtx)
- return c, nil
- }
- c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
- switch s[i] {
- case '"':
- c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
- case '\'':
- c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
- case '/':
- switch {
- case i+1 < len(s) && s[i+1] == '/':
- c.state, i = stateJSLineCmt, i+1
- case i+1 < len(s) && s[i+1] == '*':
- c.state, i = stateJSBlockCmt, i+1
- case c.jsCtx == jsCtxRegexp:
- c.state = stateJSRegexp
- case c.jsCtx == jsCtxDivOp:
- c.jsCtx = jsCtxRegexp
- default:
- return context{
- state: stateError,
- errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
- }, nil
- }
- default:
- panic("unreachable")
- }
- return c, s[i+1:]
-}
-
-// tJSStr is the context transition function for the JS string states.
-func tJSStr(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
-
- quoteAndEsc := `\"`
- if c.state == stateJSSqStr {
- quoteAndEsc = `\'`
- }
-
- b := s
- for {
- i := bytes.IndexAny(b, quoteAndEsc)
- if i == -1 {
- return c, nil
- }
- if b[i] == '\\' {
- i++
- if i == len(b) {
- return context{
- state: stateError,
- errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
- }, nil
- }
- } else {
- c.state, c.jsCtx = stateJS, jsCtxDivOp
- return c, b[i+1:]
- }
- b = b[i+1:]
- }
- panic("unreachable")
-}
-
-// tJSRegexp is the context transition function for the /RegExp/ literal state.
-func tJSRegexp(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
-
- b := s
- inCharset := false
- for {
- i := bytes.IndexAny(b, `/[\]`)
- if i == -1 {
- break
- }
- switch b[i] {
- case '/':
- if !inCharset {
- c.state, c.jsCtx = stateJS, jsCtxDivOp
- return c, b[i+1:]
- }
- case '\\':
- i++
- if i == len(b) {
- return context{
- state: stateError,
- errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
- }, nil
- }
- case '[':
- inCharset = true
- case ']':
- inCharset = false
- default:
- panic("unreachable")
- }
- b = b[i+1:]
- }
-
- if inCharset {
- // This can be fixed by making context richer if interpolation
- // into charsets is desired.
- return context{
- state: stateError,
- errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
- }, nil
- }
-
- return c, nil
-}
-
-var blockCommentEnd = []byte("*/")
-
-// tBlockCmt is the context transition function for /*comment*/ states.
-func tBlockCmt(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
- i := bytes.Index(s, blockCommentEnd)
- if i == -1 {
- return c, nil
- }
- switch c.state {
- case stateJSBlockCmt:
- c.state = stateJS
- case stateCSSBlockCmt:
- c.state = stateCSS
- default:
- panic(c.state.String())
- }
- return c, s[i+2:]
-}
-
-// tLineCmt is the context transition function for //comment states.
-func tLineCmt(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
- var lineTerminators string
- var endState state
- switch c.state {
- case stateJSLineCmt:
- lineTerminators, endState = "\n\r\u2028\u2029", stateJS
- case stateCSSLineCmt:
- lineTerminators, endState = "\n\f\r", stateCSS
- // Line comments are not part of any published CSS standard but
- // are supported by the 4 major browsers.
- // This defines line comments as
- // LINECOMMENT ::= "//" [^\n\f\d]*
- // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
- // newlines:
- // nl ::= #xA | #xD #xA | #xD | #xC
- default:
- panic(c.state.String())
- }
-
- i := bytes.IndexAny(s, lineTerminators)
- if i == -1 {
- return c, nil
- }
- c.state = endState
- // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
- // "However, the LineTerminator at the end of the line is not
- // considered to be part of the single-line comment; it is recognised
- // separately by the lexical grammar and becomes part of the stream of
- // input elements for the syntactic grammar."
- return c, s[i:]
-}
-
-// tCSS is the context transition function for the CSS state.
-func tCSS(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
-
- // CSS quoted strings are almost never used except for:
- // (1) URLs as in background: "/foo.png"
- // (2) Multiword font-names as in font-family: "Times New Roman"
- // (3) List separators in content values as in inline-lists:
- // <style>
- // ul.inlineList { list-style: none; padding:0 }
- // ul.inlineList > li { display: inline }
- // ul.inlineList > li:before { content: ", " }
- // ul.inlineList > li:first-child:before { content: "" }
- // </style>
- // <ul class=inlineList><li>One<li>Two<li>Three</ul>
- // (4) Attribute value selectors as in a[href="http://example.com/"]
- //
- // We conservatively treat all strings as URLs, but make some
- // allowances to avoid confusion.
- //
- // In (1), our conservative assumption is justified.
- // In (2), valid font names do not contain ':', '?', or '#', so our
- // conservative assumption is fine since we will never transition past
- // urlPartPreQuery.
- // In (3), our protocol heuristic should not be tripped, and there
- // should not be non-space content after a '?' or '#', so as long as
- // we only %-encode RFC 3986 reserved characters we are ok.
- // In (4), we should URL escape for URL attributes, and for others we
- // have the attribute name available if our conservative assumption
- // proves problematic for real code.
-
- for {
- i := bytes.IndexAny(s, `("'/`)
- if i == -1 {
- return c, nil
- }
- switch s[i] {
- case '(':
- // Look for url to the left.
- p := bytes.TrimRight(s[:i], "\t\n\f\r ")
- if endsWithCSSKeyword(p, "url") {
- q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
- switch {
- case len(q) != 0 && q[0] == '"':
- c.state, s = stateCSSDqURL, q[1:]
- case len(q) != 0 && q[0] == '\'':
- c.state, s = stateCSSSqURL, q[1:]
-
- default:
- c.state, s = stateCSSURL, q
- }
- return c, s
- }
- case '/':
- if i+1 < len(s) {
- switch s[i+1] {
- case '/':
- c.state = stateCSSLineCmt
- return c, s[i+2:]
- case '*':
- c.state = stateCSSBlockCmt
- return c, s[i+2:]
- }
- }
- case '"':
- c.state = stateCSSDqStr
- return c, s[i+1:]
- case '\'':
- c.state = stateCSSSqStr
- return c, s[i+1:]
- }
- s = s[i+1:]
- }
- panic("unreachable")
-}
-
-// tCSSStr is the context transition function for the CSS string and URL states.
-func tCSSStr(c context, s []byte) (context, []byte) {
- if d, t := tSpecialTagEnd(c, s); t != nil {
- return d, t
- }
-
- var endAndEsc string
- switch c.state {
- case stateCSSDqStr, stateCSSDqURL:
- endAndEsc = `\"`
- case stateCSSSqStr, stateCSSSqURL:
- endAndEsc = `\'`
- case stateCSSURL:
- // Unquoted URLs end with a newline or close parenthesis.
- // The below includes the wc (whitespace character) and nl.
- endAndEsc = "\\\t\n\f\r )"
- default:
- panic(c.state.String())
- }
-
- b := s
- for {
- i := bytes.IndexAny(b, endAndEsc)
- if i == -1 {
- return tURL(c, decodeCSS(b))
- }
- if b[i] == '\\' {
- i++
- if i == len(b) {
- return context{
- state: stateError,
- errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
- }, nil
- }
- } else {
- c.state = stateCSS
- return c, b[i+1:]
- }
- c, _ = tURL(c, decodeCSS(b[:i+1]))
- b = b[i+1:]
- }
- panic("unreachable")
-}
-
-// tError is the context transition function for the error state.
-func tError(c context, s []byte) (context, []byte) {
- return c, nil
-}
-
-// eatAttrName returns the largest j such that s[i:j] is an attribute name.
-// It returns an error if s[i:] does not look like it begins with an
-// attribute name, such as encountering a quote mark without a preceding
-// equals sign.
-func eatAttrName(s []byte, i int) (int, os.Error) {
- for j := i; j < len(s); j++ {
- switch s[j] {
- case ' ', '\t', '\n', '\f', '\r', '=', '>':
- return j, nil
- case '\'', '"', '<':
- // These result in a parse warning in HTML5 and are
- // indicative of serious problems if seen in an attr
- // name in a template.
- return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
- default:
- // No-op.
- }
- }
- return len(s), nil
-}
-
-var elementNameMap = map[string]element{
- "script": elementScript,
- "style": elementStyle,
- "textarea": elementTextarea,
- "title": elementTitle,
-}
-
-// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
-func eatTagName(s []byte, i int) (int, element) {
- j := i
- for ; j < len(s); j++ {
- x := s[j]
- if !(('a' <= x && x <= 'z') ||
- ('A' <= x && x <= 'Z') ||
- ('0' <= x && x <= '9' && i != j)) {
- break
- }
- }
- return j, elementNameMap[strings.ToLower(string(s[i:j]))]
-}
-
-// eatWhiteSpace returns the largest j such that s[i:j] is white space.
-func eatWhiteSpace(s []byte, i int) int {
- for j := i; j < len(s); j++ {
- switch s[j] {
- case ' ', '\t', '\n', '\f', '\r':
- // No-op.
- default:
- return j
- }
- }
- return len(s)
-}
-
-// urlAttr is the set of attribute names whose values are URLs.
-// It consists of all "%URI"-typed attributes from
-// http://www.w3.org/TR/html4/index/attributes.html
-// as well as those attributes defined at
-// http://dev.w3.org/html5/spec/index.html#attributes-1
-// whose Value column in that table matches
-// "Valid [non-empty] URL potentially surrounded by spaces".
-var urlAttr = map[string]bool{
- "action": true,
- "archive": true,
- "background": true,
- "cite": true,
- "classid": true,
- "codebase": true,
- "data": true,
- "formaction": true,
- "href": true,
- "icon": true,
- "longdesc": true,
- "manifest": true,
- "poster": true,
- "profile": true,
- "src": true,
- "usemap": true,
-}
--- /dev/null
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and any unconsumed text.
+var transitionFunc = [...]func(context, []byte) (context, []byte){
+ stateText: tText,
+ stateTag: tTag,
+ stateComment: tComment,
+ stateRCDATA: tSpecialTagEnd,
+ stateAttr: tAttr,
+ stateURL: tURL,
+ stateJS: tJS,
+ stateJSDqStr: tJSStr,
+ stateJSSqStr: tJSStr,
+ stateJSRegexp: tJSRegexp,
+ stateJSBlockCmt: tBlockCmt,
+ stateJSLineCmt: tLineCmt,
+ stateCSS: tCSS,
+ stateCSSDqStr: tCSSStr,
+ stateCSSSqStr: tCSSStr,
+ stateCSSDqURL: tCSSStr,
+ stateCSSSqURL: tCSSStr,
+ stateCSSURL: tCSSStr,
+ stateCSSBlockCmt: tBlockCmt,
+ stateCSSLineCmt: tLineCmt,
+ stateError: tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, []byte) {
+ for {
+ i := bytes.IndexByte(s, '<')
+ if i == -1 || i+1 == len(s) {
+ return c, nil
+ } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+ return context{state: stateComment}, s[i+4:]
+ }
+ i++
+ if s[i] == '/' {
+ if i+1 == len(s) {
+ return c, nil
+ }
+ i++
+ }
+ j, e := eatTagName(s, i)
+ if j != i {
+ // We've found an HTML tag.
+ return context{state: stateTag, element: e}, s[j:]
+ }
+ s = s[j:]
+ }
+ panic("unreachable")
+}
+
+var elementContentType = [...]state{
+ elementNone: stateText,
+ elementScript: stateJS,
+ elementStyle: stateCSS,
+ elementTextarea: stateRCDATA,
+ elementTitle: stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, []byte) {
+ // Find the attribute name.
+ attrStart := eatWhiteSpace(s, 0)
+ i, err := eatAttrName(s, attrStart)
+ if err != nil {
+ return context{
+ state: stateError,
+ errStr: err.String(),
+ }, nil
+ }
+ if i == len(s) {
+ return c, nil
+ }
+ state := stateAttr
+ canonAttrName := strings.ToLower(string(s[attrStart:i]))
+ if urlAttr[canonAttrName] {
+ state = stateURL
+ } else if strings.HasPrefix(canonAttrName, "on") {
+ state = stateJS
+ } else if canonAttrName == "style" {
+ state = stateCSS
+ }
+
+ // Look for the start of the value.
+ i = eatWhiteSpace(s, i)
+ if i == len(s) {
+ return c, s[i:]
+ }
+ if s[i] == '>' {
+ state = elementContentType[c.element]
+ return context{state: state, element: c.element}, s[i+1:]
+ } else if s[i] != '=' {
+ // Possible due to a valueless attribute or '/' in "<input />".
+ return c, s[i:]
+ }
+ // Consume the "=".
+ i = eatWhiteSpace(s, i+1)
+
+ // Find the attribute delimiter.
+ delim := delimSpaceOrTagEnd
+ if i < len(s) {
+ switch s[i] {
+ case '\'':
+ delim, i = delimSingleQuote, i+1
+ case '"':
+ delim, i = delimDoubleQuote, i+1
+ }
+ }
+
+ return context{state: state, delim: delim, element: c.element}, s[i:]
+}
+
+// tComment is the context transition function for stateComment.
+func tComment(c context, s []byte) (context, []byte) {
+ i := bytes.Index(s, commentEnd)
+ if i != -1 {
+ return context{}, s[i+3:]
+ }
+ return c, nil
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...]string{
+ elementScript: "</script",
+ elementStyle: "</style",
+ elementTextarea: "</textarea",
+ elementTitle: "</title",
+}
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, []byte) {
+ if c.element != elementNone {
+ end := specialTagEndMarkers[c.element]
+ i := strings.Index(strings.ToLower(string(s)), end)
+ if i != -1 {
+ return context{state: stateTag}, s[i+len(end):]
+ }
+ }
+ return c, nil
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, []byte) {
+ return c, nil
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, []byte) {
+ if bytes.IndexAny(s, "#?") >= 0 {
+ c.urlPart = urlPartQueryOrFrag
+ } else if len(s) != 0 && c.urlPart == urlPartNone {
+ c.urlPart = urlPartPreQuery
+ }
+ return c, nil
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+
+ i := bytes.IndexAny(s, `"'/`)
+ if i == -1 {
+ // Entire input is non string, comment, regexp tokens.
+ c.jsCtx = nextJSCtx(s, c.jsCtx)
+ return c, nil
+ }
+ c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+ switch s[i] {
+ case '"':
+ c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+ case '\'':
+ c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+ case '/':
+ switch {
+ case i+1 < len(s) && s[i+1] == '/':
+ c.state, i = stateJSLineCmt, i+1
+ case i+1 < len(s) && s[i+1] == '*':
+ c.state, i = stateJSBlockCmt, i+1
+ case c.jsCtx == jsCtxRegexp:
+ c.state = stateJSRegexp
+ case c.jsCtx == jsCtxDivOp:
+ c.jsCtx = jsCtxRegexp
+ default:
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
+ }, nil
+ }
+ default:
+ panic("unreachable")
+ }
+ return c, s[i+1:]
+}
+
+// tJSStr is the context transition function for the JS string states.
+func tJSStr(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+
+ quoteAndEsc := `\"`
+ if c.state == stateJSSqStr {
+ quoteAndEsc = `\'`
+ }
+
+ b := s
+ for {
+ i := bytes.IndexAny(b, quoteAndEsc)
+ if i == -1 {
+ return c, nil
+ }
+ if b[i] == '\\' {
+ i++
+ if i == len(b) {
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
+ }, nil
+ }
+ } else {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, b[i+1:]
+ }
+ b = b[i+1:]
+ }
+ panic("unreachable")
+}
+
+// tJSRegexp is the context transition function for the /RegExp/ literal state.
+func tJSRegexp(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+
+ b := s
+ inCharset := false
+ for {
+ i := bytes.IndexAny(b, `/[\]`)
+ if i == -1 {
+ break
+ }
+ switch b[i] {
+ case '/':
+ if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, b[i+1:]
+ }
+ case '\\':
+ i++
+ if i == len(b) {
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
+ }, nil
+ }
+ case '[':
+ inCharset = true
+ case ']':
+ inCharset = false
+ default:
+ panic("unreachable")
+ }
+ b = b[i+1:]
+ }
+
+ if inCharset {
+ // This can be fixed by making context richer if interpolation
+ // into charsets is desired.
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
+ }, nil
+ }
+
+ return c, nil
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+ i := bytes.Index(s, blockCommentEnd)
+ if i == -1 {
+ return c, nil
+ }
+ switch c.state {
+ case stateJSBlockCmt:
+ c.state = stateJS
+ case stateCSSBlockCmt:
+ c.state = stateCSS
+ default:
+ panic(c.state.String())
+ }
+ return c, s[i+2:]
+}
+
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+ var lineTerminators string
+ var endState state
+ switch c.state {
+ case stateJSLineCmt:
+ lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+ case stateCSSLineCmt:
+ lineTerminators, endState = "\n\f\r", stateCSS
+ // Line comments are not part of any published CSS standard but
+ // are supported by the 4 major browsers.
+ // This defines line comments as
+ // LINECOMMENT ::= "//" [^\n\f\d]*
+ // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+ // newlines:
+ // nl ::= #xA | #xD #xA | #xD | #xC
+ default:
+ panic(c.state.String())
+ }
+
+ i := bytes.IndexAny(s, lineTerminators)
+ if i == -1 {
+ return c, nil
+ }
+ c.state = endState
+ // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+ // "However, the LineTerminator at the end of the line is not
+ // considered to be part of the single-line comment; it is recognised
+ // separately by the lexical grammar and becomes part of the stream of
+ // input elements for the syntactic grammar."
+ return c, s[i:]
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+
+ // CSS quoted strings are almost never used except for:
+ // (1) URLs as in background: "/foo.png"
+ // (2) Multiword font-names as in font-family: "Times New Roman"
+ // (3) List separators in content values as in inline-lists:
+ // <style>
+ // ul.inlineList { list-style: none; padding:0 }
+ // ul.inlineList > li { display: inline }
+ // ul.inlineList > li:before { content: ", " }
+ // ul.inlineList > li:first-child:before { content: "" }
+ // </style>
+ // <ul class=inlineList><li>One<li>Two<li>Three</ul>
+ // (4) Attribute value selectors as in a[href="http://example.com/"]
+ //
+ // We conservatively treat all strings as URLs, but make some
+ // allowances to avoid confusion.
+ //
+ // In (1), our conservative assumption is justified.
+ // In (2), valid font names do not contain ':', '?', or '#', so our
+ // conservative assumption is fine since we will never transition past
+ // urlPartPreQuery.
+ // In (3), our protocol heuristic should not be tripped, and there
+ // should not be non-space content after a '?' or '#', so as long as
+ // we only %-encode RFC 3986 reserved characters we are ok.
+ // In (4), we should URL escape for URL attributes, and for others we
+ // have the attribute name available if our conservative assumption
+ // proves problematic for real code.
+
+ for {
+ i := bytes.IndexAny(s, `("'/`)
+ if i == -1 {
+ return c, nil
+ }
+ switch s[i] {
+ case '(':
+ // Look for url to the left.
+ p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+ if endsWithCSSKeyword(p, "url") {
+ q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+ switch {
+ case len(q) != 0 && q[0] == '"':
+ c.state, s = stateCSSDqURL, q[1:]
+ case len(q) != 0 && q[0] == '\'':
+ c.state, s = stateCSSSqURL, q[1:]
+
+ default:
+ c.state, s = stateCSSURL, q
+ }
+ return c, s
+ }
+ case '/':
+ if i+1 < len(s) {
+ switch s[i+1] {
+ case '/':
+ c.state = stateCSSLineCmt
+ return c, s[i+2:]
+ case '*':
+ c.state = stateCSSBlockCmt
+ return c, s[i+2:]
+ }
+ }
+ case '"':
+ c.state = stateCSSDqStr
+ return c, s[i+1:]
+ case '\'':
+ c.state = stateCSSSqStr
+ return c, s[i+1:]
+ }
+ s = s[i+1:]
+ }
+ panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, []byte) {
+ if d, t := tSpecialTagEnd(c, s); t != nil {
+ return d, t
+ }
+
+ var endAndEsc string
+ switch c.state {
+ case stateCSSDqStr, stateCSSDqURL:
+ endAndEsc = `\"`
+ case stateCSSSqStr, stateCSSSqURL:
+ endAndEsc = `\'`
+ case stateCSSURL:
+ // Unquoted URLs end with a newline or close parenthesis.
+ // The below includes the wc (whitespace character) and nl.
+ endAndEsc = "\\\t\n\f\r )"
+ default:
+ panic(c.state.String())
+ }
+
+ b := s
+ for {
+ i := bytes.IndexAny(b, endAndEsc)
+ if i == -1 {
+ return tURL(c, decodeCSS(b))
+ }
+ if b[i] == '\\' {
+ i++
+ if i == len(b) {
+ return context{
+ state: stateError,
+ errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
+ }, nil
+ }
+ } else {
+ c.state = stateCSS
+ return c, b[i+1:]
+ }
+ c, _ = tURL(c, decodeCSS(b[:i+1]))
+ b = b[i+1:]
+ }
+ panic("unreachable")
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, []byte) {
+ return c, nil
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, os.Error) {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r', '=', '>':
+ return j, nil
+ case '\'', '"', '<':
+ // These result in a parse warning in HTML5 and are
+ // indicative of serious problems if seen in an attr
+ // name in a template.
+ return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
+ default:
+ // No-op.
+ }
+ }
+ return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+ "script": elementScript,
+ "style": elementStyle,
+ "textarea": elementTextarea,
+ "title": elementTitle,
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+ j := i
+ for ; j < len(s); j++ {
+ x := s[j]
+ if !(('a' <= x && x <= 'z') ||
+ ('A' <= x && x <= 'Z') ||
+ ('0' <= x && x <= '9' && i != j)) {
+ break
+ }
+ }
+ return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r':
+ // No-op.
+ default:
+ return j
+ }
+ }
+ return len(s)
+}
+
+// urlAttr is the set of attribute names whose values are URLs.
+// It consists of all "%URI"-typed attributes from
+// http://www.w3.org/TR/html4/index/attributes.html
+// as well as those attributes defined at
+// http://dev.w3.org/html5/spec/index.html#attributes-1
+// whose Value column in that table matches
+// "Valid [non-empty] URL potentially surrounded by spaces".
+var urlAttr = map[string]bool{
+ "action": true,
+ "archive": true,
+ "background": true,
+ "cite": true,
+ "classid": true,
+ "codebase": true,
+ "data": true,
+ "formaction": true,
+ "href": true,
+ "icon": true,
+ "longdesc": true,
+ "manifest": true,
+ "poster": true,
+ "profile": true,
+ "src": true,
+ "usemap": true,
+}