exp/template/html: move transition functions to a separate file

author Mike Samuel <mikesamuel@gmail.com>

Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)

committer Mike Samuel <mikesamuel@gmail.com>

Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)
author Mike Samuel <mikesamuel@gmail.com>
Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)
committer Mike Samuel <mikesamuel@gmail.com>
Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)
diff --git a/src/pkg/exp/template/html/Makefile b/src/pkg/exp/template/html/Makefile

index e4fcee1ab975f2c552fa5715bff5f971b21f724f..620404b5ef02a33357e69bac3ccaa3f6f718b2c1 100644 (file)
--- a/src/pkg/exp/template/html/Makefile
+++ b/src/pkg/exp/template/html/Makefile
@@ -12,6 +12,7 @@ GOFILES=\
         escape.go\
         html.go\
         js.go\
+       transition.go\
         url.go\
  
  include ../../../../Make.pkg
diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go

index a6385fe93da50ee62f90499e2f85b892d73dacf0..22a552134014142c2d4dd15ab87e3fba5ce9ff12 100644 (file)
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@@ -12,7 +12,6 @@ import (
         "fmt"
         "html"
         "os"
-       "strings"
         "template"
         "template/parse"
  )
@@ -471,555 +470,3 @@ func (e *escaper) template(name string) *template.Template {
         }
         return t
  }
-
-// transitionFunc is the array of context transition functions for text nodes.
-// A transition function takes a context and template text input, and returns
-// the updated context and any unconsumed text.
-var transitionFunc = [...]func(context, []byte) (context, []byte){
-       stateText:        tText,
-       stateTag:         tTag,
-       stateComment:     tComment,
-       stateRCDATA:      tSpecialTagEnd,
-       stateAttr:        tAttr,
-       stateURL:         tURL,
-       stateJS:          tJS,
-       stateJSDqStr:     tJSStr,
-       stateJSSqStr:     tJSStr,
-       stateJSRegexp:    tJSRegexp,
-       stateJSBlockCmt:  tBlockCmt,
-       stateJSLineCmt:   tLineCmt,
-       stateCSS:         tCSS,
-       stateCSSDqStr:    tCSSStr,
-       stateCSSSqStr:    tCSSStr,
-       stateCSSDqURL:    tCSSStr,
-       stateCSSSqURL:    tCSSStr,
-       stateCSSURL:      tCSSStr,
-       stateCSSBlockCmt: tBlockCmt,
-       stateCSSLineCmt:  tLineCmt,
-       stateError:       tError,
-}
-
-var commentStart = []byte("<!--")
-var commentEnd = []byte("-->")
-
-// tText is the context transition function for the text state.
-func tText(c context, s []byte) (context, []byte) {
-       for {
-               i := bytes.IndexByte(s, '<')
-               if i == -1 || i+1 == len(s) {
-                       return c, nil
-               } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
-                       return context{state: stateComment}, s[i+4:]
-               }
-               i++
-               if s[i] == '/' {
-                       if i+1 == len(s) {
-                               return c, nil
-                       }
-                       i++
-               }
-               j, e := eatTagName(s, i)
-               if j != i {
-                       // We've found an HTML tag.
-                       return context{state: stateTag, element: e}, s[j:]
-               }
-               s = s[j:]
-       }
-       panic("unreachable")
-}
-
-var elementContentType = [...]state{
-       elementNone:     stateText,
-       elementScript:   stateJS,
-       elementStyle:    stateCSS,
-       elementTextarea: stateRCDATA,
-       elementTitle:    stateRCDATA,
-}
-
-// tTag is the context transition function for the tag state.
-func tTag(c context, s []byte) (context, []byte) {
-       // Find the attribute name.
-       attrStart := eatWhiteSpace(s, 0)
-       i, err := eatAttrName(s, attrStart)
-       if err != nil {
-               return context{
-                       state:  stateError,
-                       errStr: err.String(),
-               }, nil
-       }
-       if i == len(s) {
-               return c, nil
-       }
-       state := stateAttr
-       canonAttrName := strings.ToLower(string(s[attrStart:i]))
-       if urlAttr[canonAttrName] {
-               state = stateURL
-       } else if strings.HasPrefix(canonAttrName, "on") {
-               state = stateJS
-       } else if canonAttrName == "style" {
-               state = stateCSS
-       }
-
-       // Look for the start of the value.
-       i = eatWhiteSpace(s, i)
-       if i == len(s) {
-               return c, s[i:]
-       }
-       if s[i] == '>' {
-               state = elementContentType[c.element]
-               return context{state: state, element: c.element}, s[i+1:]
-       } else if s[i] != '=' {
-               // Possible due to a valueless attribute or '/' in "<input />".
-               return c, s[i:]
-       }
-       // Consume the "=".
-       i = eatWhiteSpace(s, i+1)
-
-       // Find the attribute delimiter.
-       delim := delimSpaceOrTagEnd
-       if i < len(s) {
-               switch s[i] {
-               case '\'':
-                       delim, i = delimSingleQuote, i+1
-               case '"':
-                       delim, i = delimDoubleQuote, i+1
-               }
-       }
-
-       return context{state: state, delim: delim, element: c.element}, s[i:]
-}
-
-// tComment is the context transition function for stateComment.
-func tComment(c context, s []byte) (context, []byte) {
-       i := bytes.Index(s, commentEnd)
-       if i != -1 {
-               return context{}, s[i+3:]
-       }
-       return c, nil
-}
-
-// specialTagEndMarkers maps element types to the character sequence that
-// case-insensitively signals the end of the special tag body.
-var specialTagEndMarkers = [...]string{
-       elementScript:   "</script",
-       elementStyle:    "</style",
-       elementTextarea: "</textarea",
-       elementTitle:    "</title",
-}
-
-// tSpecialTagEnd is the context transition function for raw text and RCDATA
-// element states.
-func tSpecialTagEnd(c context, s []byte) (context, []byte) {
-       if c.element != elementNone {
-               end := specialTagEndMarkers[c.element]
-               i := strings.Index(strings.ToLower(string(s)), end)
-               if i != -1 {
-                       return context{state: stateTag}, s[i+len(end):]
-               }
-       }
-       return c, nil
-}
-
-// tAttr is the context transition function for the attribute state.
-func tAttr(c context, s []byte) (context, []byte) {
-       return c, nil
-}
-
-// tURL is the context transition function for the URL state.
-func tURL(c context, s []byte) (context, []byte) {
-       if bytes.IndexAny(s, "#?") >= 0 {
-               c.urlPart = urlPartQueryOrFrag
-       } else if len(s) != 0 && c.urlPart == urlPartNone {
-               c.urlPart = urlPartPreQuery
-       }
-       return c, nil
-}
-
-// tJS is the context transition function for the JS state.
-func tJS(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-
-       i := bytes.IndexAny(s, `"'/`)
-       if i == -1 {
-               // Entire input is non string, comment, regexp tokens.
-               c.jsCtx = nextJSCtx(s, c.jsCtx)
-               return c, nil
-       }
-       c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
-       switch s[i] {
-       case '"':
-               c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
-       case '\'':
-               c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
-       case '/':
-               switch {
-               case i+1 < len(s) && s[i+1] == '/':
-                       c.state, i = stateJSLineCmt, i+1
-               case i+1 < len(s) && s[i+1] == '*':
-                       c.state, i = stateJSBlockCmt, i+1
-               case c.jsCtx == jsCtxRegexp:
-                       c.state = stateJSRegexp
-               case c.jsCtx == jsCtxDivOp:
-                       c.jsCtx = jsCtxRegexp
-               default:
-                       return context{
-                               state:  stateError,
-                               errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
-                       }, nil
-               }
-       default:
-               panic("unreachable")
-       }
-       return c, s[i+1:]
-}
-
-// tJSStr is the context transition function for the JS string states.
-func tJSStr(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-
-       quoteAndEsc := `\"`
-       if c.state == stateJSSqStr {
-               quoteAndEsc = `\'`
-       }
-
-       b := s
-       for {
-               i := bytes.IndexAny(b, quoteAndEsc)
-               if i == -1 {
-                       return c, nil
-               }
-               if b[i] == '\\' {
-                       i++
-                       if i == len(b) {
-                               return context{
-                                       state:  stateError,
-                                       errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
-                               }, nil
-                       }
-               } else {
-                       c.state, c.jsCtx = stateJS, jsCtxDivOp
-                       return c, b[i+1:]
-               }
-               b = b[i+1:]
-       }
-       panic("unreachable")
-}
-
-// tJSRegexp is the context transition function for the /RegExp/ literal state.
-func tJSRegexp(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-
-       b := s
-       inCharset := false
-       for {
-               i := bytes.IndexAny(b, `/[\]`)
-               if i == -1 {
-                       break
-               }
-               switch b[i] {
-               case '/':
-                       if !inCharset {
-                               c.state, c.jsCtx = stateJS, jsCtxDivOp
-                               return c, b[i+1:]
-                       }
-               case '\\':
-                       i++
-                       if i == len(b) {
-                               return context{
-                                       state:  stateError,
-                                       errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
-                               }, nil
-                       }
-               case '[':
-                       inCharset = true
-               case ']':
-                       inCharset = false
-               default:
-                       panic("unreachable")
-               }
-               b = b[i+1:]
-       }
-
-       if inCharset {
-               // This can be fixed by making context richer if interpolation
-               // into charsets is desired.
-               return context{
-                       state:  stateError,
-                       errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
-               }, nil
-       }
-
-       return c, nil
-}
-
-var blockCommentEnd = []byte("*/")
-
-// tBlockCmt is the context transition function for /*comment*/ states.
-func tBlockCmt(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-       i := bytes.Index(s, blockCommentEnd)
-       if i == -1 {
-               return c, nil
-       }
-       switch c.state {
-       case stateJSBlockCmt:
-               c.state = stateJS
-       case stateCSSBlockCmt:
-               c.state = stateCSS
-       default:
-               panic(c.state.String())
-       }
-       return c, s[i+2:]
-}
-
-// tLineCmt is the context transition function for //comment states.
-func tLineCmt(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-       var lineTerminators string
-       var endState state
-       switch c.state {
-       case stateJSLineCmt:
-               lineTerminators, endState = "\n\r\u2028\u2029", stateJS
-       case stateCSSLineCmt:
-               lineTerminators, endState = "\n\f\r", stateCSS
-               // Line comments are not part of any published CSS standard but
-               // are supported by the 4 major browsers.
-               // This defines line comments as
-               //     LINECOMMENT ::= "//" [^\n\f\d]*
-               // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
-               // newlines:
-               //     nl ::= #xA | #xD #xA | #xD | #xC
-       default:
-               panic(c.state.String())
-       }
-
-       i := bytes.IndexAny(s, lineTerminators)
-       if i == -1 {
-               return c, nil
-       }
-       c.state = endState
-       // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
-       // "However, the LineTerminator at the end of the line is not
-       // considered to be part of the single-line comment; it is recognised
-       // separately by the lexical grammar and becomes part of the stream of
-       // input elements for the syntactic grammar."
-       return c, s[i:]
-}
-
-// tCSS is the context transition function for the CSS state.
-func tCSS(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-
-       // CSS quoted strings are almost never used except for:
-       // (1) URLs as in background: "/foo.png"
-       // (2) Multiword font-names as in font-family: "Times New Roman"
-       // (3) List separators in content values as in inline-lists:
-       //    <style>
-       //    ul.inlineList { list-style: none; padding:0 }
-       //    ul.inlineList > li { display: inline }
-       //    ul.inlineList > li:before { content: ", " }
-       //    ul.inlineList > li:first-child:before { content: "" }
-       //    </style>
-       //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
-       // (4) Attribute value selectors as in a[href="http://example.com/"]
-       //
-       // We conservatively treat all strings as URLs, but make some
-       // allowances to avoid confusion.
-       //
-       // In (1), our conservative assumption is justified.
-       // In (2), valid font names do not contain ':', '?', or '#', so our
-       // conservative assumption is fine since we will never transition past
-       // urlPartPreQuery.
-       // In (3), our protocol heuristic should not be tripped, and there
-       // should not be non-space content after a '?' or '#', so as long as
-       // we only %-encode RFC 3986 reserved characters we are ok.
-       // In (4), we should URL escape for URL attributes, and for others we
-       // have the attribute name available if our conservative assumption
-       // proves problematic for real code.
-
-       for {
-               i := bytes.IndexAny(s, `("'/`)
-               if i == -1 {
-                       return c, nil
-               }
-               switch s[i] {
-               case '(':
-                       // Look for url to the left.
-                       p := bytes.TrimRight(s[:i], "\t\n\f\r ")
-                       if endsWithCSSKeyword(p, "url") {
-                               q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
-                               switch {
-                               case len(q) != 0 && q[0] == '"':
-                                       c.state, s = stateCSSDqURL, q[1:]
-                               case len(q) != 0 && q[0] == '\'':
-                                       c.state, s = stateCSSSqURL, q[1:]
-
-                               default:
-                                       c.state, s = stateCSSURL, q
-                               }
-                               return c, s
-                       }
-               case '/':
-                       if i+1 < len(s) {
-                               switch s[i+1] {
-                               case '/':
-                                       c.state = stateCSSLineCmt
-                                       return c, s[i+2:]
-                               case '*':
-                                       c.state = stateCSSBlockCmt
-                                       return c, s[i+2:]
-                               }
-                       }
-               case '"':
-                       c.state = stateCSSDqStr
-                       return c, s[i+1:]
-               case '\'':
-                       c.state = stateCSSSqStr
-                       return c, s[i+1:]
-               }
-               s = s[i+1:]
-       }
-       panic("unreachable")
-}
-
-// tCSSStr is the context transition function for the CSS string and URL states.
-func tCSSStr(c context, s []byte) (context, []byte) {
-       if d, t := tSpecialTagEnd(c, s); t != nil {
-               return d, t
-       }
-
-       var endAndEsc string
-       switch c.state {
-       case stateCSSDqStr, stateCSSDqURL:
-               endAndEsc = `\"`
-       case stateCSSSqStr, stateCSSSqURL:
-               endAndEsc = `\'`
-       case stateCSSURL:
-               // Unquoted URLs end with a newline or close parenthesis.
-               // The below includes the wc (whitespace character) and nl.
-               endAndEsc = "\\\t\n\f\r )"
-       default:
-               panic(c.state.String())
-       }
-
-       b := s
-       for {
-               i := bytes.IndexAny(b, endAndEsc)
-               if i == -1 {
-                       return tURL(c, decodeCSS(b))
-               }
-               if b[i] == '\\' {
-                       i++
-                       if i == len(b) {
-                               return context{
-                                       state:  stateError,
-                                       errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
-                               }, nil
-                       }
-               } else {
-                       c.state = stateCSS
-                       return c, b[i+1:]
-               }
-               c, _ = tURL(c, decodeCSS(b[:i+1]))
-               b = b[i+1:]
-       }
-       panic("unreachable")
-}
-
-// tError is the context transition function for the error state.
-func tError(c context, s []byte) (context, []byte) {
-       return c, nil
-}
-
-// eatAttrName returns the largest j such that s[i:j] is an attribute name.
-// It returns an error if s[i:] does not look like it begins with an
-// attribute name, such as encountering a quote mark without a preceding
-// equals sign.
-func eatAttrName(s []byte, i int) (int, os.Error) {
-       for j := i; j < len(s); j++ {
-               switch s[j] {
-               case ' ', '\t', '\n', '\f', '\r', '=', '>':
-                       return j, nil
-               case '\'', '"', '<':
-                       // These result in a parse warning in HTML5 and are
-                       // indicative of serious problems if seen in an attr
-                       // name in a template.
-                       return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
-               default:
-                       // No-op.
-               }
-       }
-       return len(s), nil
-}
-
-var elementNameMap = map[string]element{
-       "script":   elementScript,
-       "style":    elementStyle,
-       "textarea": elementTextarea,
-       "title":    elementTitle,
-}
-
-// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
-func eatTagName(s []byte, i int) (int, element) {
-       j := i
-       for ; j < len(s); j++ {
-               x := s[j]
-               if !(('a' <= x && x <= 'z') ||
-                       ('A' <= x && x <= 'Z') ||
-                       ('0' <= x && x <= '9' && i != j)) {
-                       break
-               }
-       }
-       return j, elementNameMap[strings.ToLower(string(s[i:j]))]
-}
-
-// eatWhiteSpace returns the largest j such that s[i:j] is white space.
-func eatWhiteSpace(s []byte, i int) int {
-       for j := i; j < len(s); j++ {
-               switch s[j] {
-               case ' ', '\t', '\n', '\f', '\r':
-                       // No-op.
-               default:
-                       return j
-               }
-       }
-       return len(s)
-}
-
-// urlAttr is the set of attribute names whose values are URLs.
-// It consists of all "%URI"-typed attributes from
-// http://www.w3.org/TR/html4/index/attributes.html
-// as well as those attributes defined at
-// http://dev.w3.org/html5/spec/index.html#attributes-1
-// whose Value column in that table matches
-// "Valid [non-empty] URL potentially surrounded by spaces".
-var urlAttr = map[string]bool{
-       "action":     true,
-       "archive":    true,
-       "background": true,
-       "cite":       true,
-       "classid":    true,
-       "codebase":   true,
-       "data":       true,
-       "formaction": true,
-       "href":       true,
-       "icon":       true,
-       "longdesc":   true,
-       "manifest":   true,
-       "poster":     true,
-       "profile":    true,
-       "src":        true,
-       "usemap":     true,
-}
diff --git a/src/pkg/exp/template/html/transition.go b/src/pkg/exp/template/html/transition.go

new file mode 100644 (file)

index 0000000..117b20a
--- /dev/null
+++ b/src/pkg/exp/template/html/transition.go
@@ -0,0 +1,564 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "os"
+       "strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and any unconsumed text.
+var transitionFunc = [...]func(context, []byte) (context, []byte){
+       stateText:        tText,
+       stateTag:         tTag,
+       stateComment:     tComment,
+       stateRCDATA:      tSpecialTagEnd,
+       stateAttr:        tAttr,
+       stateURL:         tURL,
+       stateJS:          tJS,
+       stateJSDqStr:     tJSStr,
+       stateJSSqStr:     tJSStr,
+       stateJSRegexp:    tJSRegexp,
+       stateJSBlockCmt:  tBlockCmt,
+       stateJSLineCmt:   tLineCmt,
+       stateCSS:         tCSS,
+       stateCSSDqStr:    tCSSStr,
+       stateCSSSqStr:    tCSSStr,
+       stateCSSDqURL:    tCSSStr,
+       stateCSSSqURL:    tCSSStr,
+       stateCSSURL:      tCSSStr,
+       stateCSSBlockCmt: tBlockCmt,
+       stateCSSLineCmt:  tLineCmt,
+       stateError:       tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, []byte) {
+       for {
+               i := bytes.IndexByte(s, '<')
+               if i == -1 || i+1 == len(s) {
+                       return c, nil
+               } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+                       return context{state: stateComment}, s[i+4:]
+               }
+               i++
+               if s[i] == '/' {
+                       if i+1 == len(s) {
+                               return c, nil
+                       }
+                       i++
+               }
+               j, e := eatTagName(s, i)
+               if j != i {
+                       // We've found an HTML tag.
+                       return context{state: stateTag, element: e}, s[j:]
+               }
+               s = s[j:]
+       }
+       panic("unreachable")
+}
+
+var elementContentType = [...]state{
+       elementNone:     stateText,
+       elementScript:   stateJS,
+       elementStyle:    stateCSS,
+       elementTextarea: stateRCDATA,
+       elementTitle:    stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, []byte) {
+       // Find the attribute name.
+       attrStart := eatWhiteSpace(s, 0)
+       i, err := eatAttrName(s, attrStart)
+       if err != nil {
+               return context{
+                       state:  stateError,
+                       errStr: err.String(),
+               }, nil
+       }
+       if i == len(s) {
+               return c, nil
+       }
+       state := stateAttr
+       canonAttrName := strings.ToLower(string(s[attrStart:i]))
+       if urlAttr[canonAttrName] {
+               state = stateURL
+       } else if strings.HasPrefix(canonAttrName, "on") {
+               state = stateJS
+       } else if canonAttrName == "style" {
+               state = stateCSS
+       }
+
+       // Look for the start of the value.
+       i = eatWhiteSpace(s, i)
+       if i == len(s) {
+               return c, s[i:]
+       }
+       if s[i] == '>' {
+               state = elementContentType[c.element]
+               return context{state: state, element: c.element}, s[i+1:]
+       } else if s[i] != '=' {
+               // Possible due to a valueless attribute or '/' in "<input />".
+               return c, s[i:]
+       }
+       // Consume the "=".
+       i = eatWhiteSpace(s, i+1)
+
+       // Find the attribute delimiter.
+       delim := delimSpaceOrTagEnd
+       if i < len(s) {
+               switch s[i] {
+               case '\'':
+                       delim, i = delimSingleQuote, i+1
+               case '"':
+                       delim, i = delimDoubleQuote, i+1
+               }
+       }
+
+       return context{state: state, delim: delim, element: c.element}, s[i:]
+}
+
+// tComment is the context transition function for stateComment.
+func tComment(c context, s []byte) (context, []byte) {
+       i := bytes.Index(s, commentEnd)
+       if i != -1 {
+               return context{}, s[i+3:]
+       }
+       return c, nil
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...]string{
+       elementScript:   "</script",
+       elementStyle:    "</style",
+       elementTextarea: "</textarea",
+       elementTitle:    "</title",
+}
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, []byte) {
+       if c.element != elementNone {
+               end := specialTagEndMarkers[c.element]
+               i := strings.Index(strings.ToLower(string(s)), end)
+               if i != -1 {
+                       return context{state: stateTag}, s[i+len(end):]
+               }
+       }
+       return c, nil
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, []byte) {
+       return c, nil
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, []byte) {
+       if bytes.IndexAny(s, "#?") >= 0 {
+               c.urlPart = urlPartQueryOrFrag
+       } else if len(s) != 0 && c.urlPart == urlPartNone {
+               c.urlPart = urlPartPreQuery
+       }
+       return c, nil
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+
+       i := bytes.IndexAny(s, `"'/`)
+       if i == -1 {
+               // Entire input is non string, comment, regexp tokens.
+               c.jsCtx = nextJSCtx(s, c.jsCtx)
+               return c, nil
+       }
+       c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+       switch s[i] {
+       case '"':
+               c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+       case '\'':
+               c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+       case '/':
+               switch {
+               case i+1 < len(s) && s[i+1] == '/':
+                       c.state, i = stateJSLineCmt, i+1
+               case i+1 < len(s) && s[i+1] == '*':
+                       c.state, i = stateJSBlockCmt, i+1
+               case c.jsCtx == jsCtxRegexp:
+                       c.state = stateJSRegexp
+               case c.jsCtx == jsCtxDivOp:
+                       c.jsCtx = jsCtxRegexp
+               default:
+                       return context{
+                               state:  stateError,
+                               errStr: fmt.Sprintf("'/' could start div or regexp: %.32q", s[i:]),
+                       }, nil
+               }
+       default:
+               panic("unreachable")
+       }
+       return c, s[i+1:]
+}
+
+// tJSStr is the context transition function for the JS string states.
+func tJSStr(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+
+       quoteAndEsc := `\"`
+       if c.state == stateJSSqStr {
+               quoteAndEsc = `\'`
+       }
+
+       b := s
+       for {
+               i := bytes.IndexAny(b, quoteAndEsc)
+               if i == -1 {
+                       return c, nil
+               }
+               if b[i] == '\\' {
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
+                               }, nil
+                       }
+               } else {
+                       c.state, c.jsCtx = stateJS, jsCtxDivOp
+                       return c, b[i+1:]
+               }
+               b = b[i+1:]
+       }
+       panic("unreachable")
+}
+
+// tJSRegexp is the context transition function for the /RegExp/ literal state.
+func tJSRegexp(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+
+       b := s
+       inCharset := false
+       for {
+               i := bytes.IndexAny(b, `/[\]`)
+               if i == -1 {
+                       break
+               }
+               switch b[i] {
+               case '/':
+                       if !inCharset {
+                               c.state, c.jsCtx = stateJS, jsCtxDivOp
+                               return c, b[i+1:]
+                       }
+               case '\\':
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
+                               }, nil
+                       }
+               case '[':
+                       inCharset = true
+               case ']':
+                       inCharset = false
+               default:
+                       panic("unreachable")
+               }
+               b = b[i+1:]
+       }
+
+       if inCharset {
+               // This can be fixed by making context richer if interpolation
+               // into charsets is desired.
+               return context{
+                       state:  stateError,
+                       errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
+               }, nil
+       }
+
+       return c, nil
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+       i := bytes.Index(s, blockCommentEnd)
+       if i == -1 {
+               return c, nil
+       }
+       switch c.state {
+       case stateJSBlockCmt:
+               c.state = stateJS
+       case stateCSSBlockCmt:
+               c.state = stateCSS
+       default:
+               panic(c.state.String())
+       }
+       return c, s[i+2:]
+}
+
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+       var lineTerminators string
+       var endState state
+       switch c.state {
+       case stateJSLineCmt:
+               lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+       case stateCSSLineCmt:
+               lineTerminators, endState = "\n\f\r", stateCSS
+               // Line comments are not part of any published CSS standard but
+               // are supported by the 4 major browsers.
+               // This defines line comments as
+               //     LINECOMMENT ::= "//" [^\n\f\d]*
+               // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+               // newlines:
+               //     nl ::= #xA | #xD #xA | #xD | #xC
+       default:
+               panic(c.state.String())
+       }
+
+       i := bytes.IndexAny(s, lineTerminators)
+       if i == -1 {
+               return c, nil
+       }
+       c.state = endState
+       // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+       // "However, the LineTerminator at the end of the line is not
+       // considered to be part of the single-line comment; it is recognised
+       // separately by the lexical grammar and becomes part of the stream of
+       // input elements for the syntactic grammar."
+       return c, s[i:]
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+
+       // CSS quoted strings are almost never used except for:
+       // (1) URLs as in background: "/foo.png"
+       // (2) Multiword font-names as in font-family: "Times New Roman"
+       // (3) List separators in content values as in inline-lists:
+       //    <style>
+       //    ul.inlineList { list-style: none; padding:0 }
+       //    ul.inlineList > li { display: inline }
+       //    ul.inlineList > li:before { content: ", " }
+       //    ul.inlineList > li:first-child:before { content: "" }
+       //    </style>
+       //    <ul class=inlineList><li>One<li>Two<li>Three</ul>
+       // (4) Attribute value selectors as in a[href="http://example.com/"]
+       //
+       // We conservatively treat all strings as URLs, but make some
+       // allowances to avoid confusion.
+       //
+       // In (1), our conservative assumption is justified.
+       // In (2), valid font names do not contain ':', '?', or '#', so our
+       // conservative assumption is fine since we will never transition past
+       // urlPartPreQuery.
+       // In (3), our protocol heuristic should not be tripped, and there
+       // should not be non-space content after a '?' or '#', so as long as
+       // we only %-encode RFC 3986 reserved characters we are ok.
+       // In (4), we should URL escape for URL attributes, and for others we
+       // have the attribute name available if our conservative assumption
+       // proves problematic for real code.
+
+       for {
+               i := bytes.IndexAny(s, `("'/`)
+               if i == -1 {
+                       return c, nil
+               }
+               switch s[i] {
+               case '(':
+                       // Look for url to the left.
+                       p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+                       if endsWithCSSKeyword(p, "url") {
+                               q := bytes.TrimLeft(s[i+1:], "\t\n\f\r ")
+                               switch {
+                               case len(q) != 0 && q[0] == '"':
+                                       c.state, s = stateCSSDqURL, q[1:]
+                               case len(q) != 0 && q[0] == '\'':
+                                       c.state, s = stateCSSSqURL, q[1:]
+
+                               default:
+                                       c.state, s = stateCSSURL, q
+                               }
+                               return c, s
+                       }
+               case '/':
+                       if i+1 < len(s) {
+                               switch s[i+1] {
+                               case '/':
+                                       c.state = stateCSSLineCmt
+                                       return c, s[i+2:]
+                               case '*':
+                                       c.state = stateCSSBlockCmt
+                                       return c, s[i+2:]
+                               }
+                       }
+               case '"':
+                       c.state = stateCSSDqStr
+                       return c, s[i+1:]
+               case '\'':
+                       c.state = stateCSSSqStr
+                       return c, s[i+1:]
+               }
+               s = s[i+1:]
+       }
+       panic("unreachable")
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, []byte) {
+       if d, t := tSpecialTagEnd(c, s); t != nil {
+               return d, t
+       }
+
+       var endAndEsc string
+       switch c.state {
+       case stateCSSDqStr, stateCSSDqURL:
+               endAndEsc = `\"`
+       case stateCSSSqStr, stateCSSSqURL:
+               endAndEsc = `\'`
+       case stateCSSURL:
+               // Unquoted URLs end with a newline or close parenthesis.
+               // The below includes the wc (whitespace character) and nl.
+               endAndEsc = "\\\t\n\f\r )"
+       default:
+               panic(c.state.String())
+       }
+
+       b := s
+       for {
+               i := bytes.IndexAny(b, endAndEsc)
+               if i == -1 {
+                       return tURL(c, decodeCSS(b))
+               }
+               if b[i] == '\\' {
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in CSS string: %q", s),
+                               }, nil
+                       }
+               } else {
+                       c.state = stateCSS
+                       return c, b[i+1:]
+               }
+               c, _ = tURL(c, decodeCSS(b[:i+1]))
+               b = b[i+1:]
+       }
+       panic("unreachable")
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, []byte) {
+       return c, nil
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, os.Error) {
+       for j := i; j < len(s); j++ {
+               switch s[j] {
+               case ' ', '\t', '\n', '\f', '\r', '=', '>':
+                       return j, nil
+               case '\'', '"', '<':
+                       // These result in a parse warning in HTML5 and are
+                       // indicative of serious problems if seen in an attr
+                       // name in a template.
+                       return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s)
+               default:
+                       // No-op.
+               }
+       }
+       return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+       "script":   elementScript,
+       "style":    elementStyle,
+       "textarea": elementTextarea,
+       "title":    elementTitle,
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+       j := i
+       for ; j < len(s); j++ {
+               x := s[j]
+               if !(('a' <= x && x <= 'z') ||
+                       ('A' <= x && x <= 'Z') ||
+                       ('0' <= x && x <= '9' && i != j)) {
+                       break
+               }
+       }
+       return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+       for j := i; j < len(s); j++ {
+               switch s[j] {
+               case ' ', '\t', '\n', '\f', '\r':
+                       // No-op.
+               default:
+                       return j
+               }
+       }
+       return len(s)
+}
+
+// urlAttr is the set of attribute names whose values are URLs.
+// It consists of all "%URI"-typed attributes from
+// http://www.w3.org/TR/html4/index/attributes.html
+// as well as those attributes defined at
+// http://dev.w3.org/html5/spec/index.html#attributes-1
+// whose Value column in that table matches
+// "Valid [non-empty] URL potentially surrounded by spaces".
+var urlAttr = map[string]bool{
+       "action":     true,
+       "archive":    true,
+       "background": true,
+       "cite":       true,
+       "classid":    true,
+       "codebase":   true,
+       "data":       true,
+       "formaction": true,
+       "href":       true,
+       "icon":       true,
+       "longdesc":   true,
+       "manifest":   true,
+       "poster":     true,
+       "profile":    true,
+       "src":        true,
+       "usemap":     true,
+}
author	Mike Samuel <mikesamuel@gmail.com>
	Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)
committer	Mike Samuel <mikesamuel@gmail.com>
	Wed, 14 Sep 2011 00:53:55 +0000 (17:53 -0700)
src/pkg/exp/template/html/Makefile		patch \| blob \| history
src/pkg/exp/template/html/escape.go		patch \| blob \| history
src/pkg/exp/template/html/transition.go	[new file with mode: 0644]	patch \| blob