exp/template/html: Implement grammar for JS.

author Mike Samuel <mikesamuel@gmail.com>

Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)

committer Nigel Tao <nigeltao@golang.org>

Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)
author Mike Samuel <mikesamuel@gmail.com>
Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)
committer Nigel Tao <nigeltao@golang.org>
Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)
diff --git a/src/pkg/exp/template/html/Makefile b/src/pkg/exp/template/html/Makefile

index 6d8ff5cd14a539fb8dbb852accdb2a1471076b65..3a93bebc091e37728df87cc33db02767d4ceb5d7 100644 (file)
--- a/src/pkg/exp/template/html/Makefile
+++ b/src/pkg/exp/template/html/Makefile
@@ -8,5 +8,6 @@ TARG=exp/template/html
  GOFILES=\
         context.go\
         escape.go\
+       js.go\
  
  include ../../../../Make.pkg
diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go

index d8fed158677402bf2e7044c931df954456c331de..428b3d0b3af785261a5991fed7bd6cfa9c0f5c6f 100644 (file)
--- a/src/pkg/exp/template/html/context.go
+++ b/src/pkg/exp/template/html/context.go
@@ -19,13 +19,14 @@ type context struct {
         state   state
         delim   delim
         urlPart urlPart
+       jsCtx   jsCtx
         errLine int
         errStr  string
  }
  
  // eq returns whether two contexts are equal.
  func (c context) eq(d context) bool {
-       return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.errLine == d.errLine && c.errStr == d.errStr
+       return c.state == d.state && c.delim == d.delim && c.urlPart == d.urlPart && c.jsCtx == d.jsCtx && c.errLine == d.errLine && c.errStr == d.errStr
  }
  
  // state describes a high-level HTML parser state.
@@ -50,17 +51,35 @@ const (
         stateAttr
         // stateURL occurs inside an HTML attribute whose content is a URL.
         stateURL
+       // stateJS occurs inside an event handler or script element.
+       stateJS
+       // stateJSDqStr occurs inside a JavaScript double quoted string.
+       stateJSDqStr
+       // stateJSSqStr occurs inside a JavaScript single quoted string.
+       stateJSSqStr
+       // stateJSRegexp occurs inside a JavaScript regexp literal.
+       stateJSRegexp
+       // stateJSBlockCmt occurs inside a JavaScript /* block comment */.
+       stateJSBlockCmt
+       // stateJSLineCmt occurs inside a JavaScript // line comment.
+       stateJSLineCmt
         // stateError is an infectious error state outside any valid
         // HTML/CSS/JS construct.
         stateError
  )
  
  var stateNames = [...]string{
-       stateText:  "stateText",
-       stateTag:   "stateTag",
-       stateAttr:  "stateAttr",
-       stateURL:   "stateURL",
-       stateError: "stateError",
+       stateText:       "stateText",
+       stateTag:        "stateTag",
+       stateAttr:       "stateAttr",
+       stateURL:        "stateURL",
+       stateJS:         "stateJS",
+       stateJSDqStr:    "stateJSDqStr",
+       stateJSSqStr:    "stateJSSqStr",
+       stateJSRegexp:   "stateJSRegexp",
+       stateJSBlockCmt: "stateJSBlockCmt",
+       stateJSLineCmt:  "stateJSLineCmt",
+       stateError:      "stateError",
  }
  
  func (s state) String() string {
@@ -131,3 +150,24 @@ func (u urlPart) String() string {
         }
         return fmt.Sprintf("illegal urlPart %d", u)
  }
+
+// jsCtx determines whether a '/' starts a regular expression literal or a
+// division operator.
+type jsCtx uint8
+
+const (
+       // jsCtxRegexp occurs where a '/' would start a regexp literal.
+       jsCtxRegexp jsCtx = iota
+       // jsCtxDivOp occurs where a '/' would start a division operator.
+       jsCtxDivOp
+)
+
+func (c jsCtx) String() string {
+       switch c {
+       case jsCtxRegexp:
+               return "jsCtxRegexp"
+       case jsCtxDivOp:
+               return "jsCtxDivOp"
+       }
+       return fmt.Sprintf("illegal jsCtx %d", c)
+}
diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go

index e7de81c4c6860910c7a210b06826bdfe1a94e7f9..0eb8dfec8d523fea282cdfa66c708bdec459c4d6 100644 (file)
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@@ -33,7 +33,10 @@ func Escape(t *template.Template) (*template.Template, os.Error) {
  
  // funcMap maps command names to functions that render their inputs safe.
  var funcMap = template.FuncMap{
-       "exp_template_html_urlfilter": urlFilter,
+       "exp_template_html_urlfilter":       urlFilter,
+       "exp_template_html_jsvalescaper":    jsValEscaper,
+       "exp_template_html_jsstrescaper":    jsStrEscaper,
+       "exp_template_html_jsregexpescaper": jsRegexpEscaper,
  }
  
  // escape escapes a template node.
@@ -58,15 +61,16 @@ func escape(c context, n parse.Node) context {
  
  // escapeAction escapes an action template node.
  func escapeAction(c context, n *parse.ActionNode) context {
-       sanitizer := "html"
-       if c.state == stateURL {
+       s := make([]string, 0, 2)
+       switch c.state {
+       case stateURL:
                 switch c.urlPart {
                 case urlPartNone:
-                       sanitizer = "exp_template_html_urlfilter"
+                       s = append(s, "exp_template_html_urlfilter")
                 case urlPartQueryOrFrag:
-                       sanitizer = "urlquery"
+                       s = append(s, "urlquery")
                 case urlPartPreQuery:
-                       // The default "html" works here.
+                       s = append(s, "html")
                 case urlPartUnknown:
                         return context{
                                 state:   stateError,
@@ -76,21 +80,94 @@ func escapeAction(c context, n *parse.ActionNode) context {
                 default:
                         panic(c.urlPart.String())
                 }
+       case stateJS:
+               s = append(s, "exp_template_html_jsvalescaper")
+               if c.delim != delimNone {
+                       s = append(s, "html")
+               }
+       case stateJSDqStr, stateJSSqStr:
+               s = append(s, "exp_template_html_jsstrescaper")
+       case stateJSRegexp:
+               s = append(s, "exp_template_html_jsregexpescaper")
+       case stateJSBlockCmt, stateJSLineCmt:
+               return context{
+                       state:   stateError,
+                       errLine: n.Line,
+                       errStr:  fmt.Sprintf("%s appears inside a comment", n),
+               }
+       default:
+               s = append(s, "html")
+       }
+       ensurePipelineContains(n.Pipe, s)
+       return c
+}
+
+// ensurePipelineContains ensures that the pipeline has commands with
+// the identifiers in s in order.
+// If the pipeline already has some of the sanitizers, do not interfere.
+// For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it
+// has one matching, "html", and one to insert, "escapeJSVal", to produce
+// (.X | escapeJSVal | html).
+func ensurePipelineContains(p *parse.PipeNode, s []string) {
+       if len(s) == 0 {
+               return
+       }
+       n := len(p.Cmds)
+       // Find the identifiers at the end of the command chain.
+       idents := p.Cmds
+       for i := n - 1; i >= 0; i-- {
+               if cmd := p.Cmds[i]; len(cmd.Args) != 0 {
+                       if _, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
+                               continue
+                       }
+               }
+               idents = p.Cmds[i+1:]
         }
-       // If the pipe already ends with the sanitizer, do not interfere.
-       if m := len(n.Pipe.Cmds); m != 0 {
-               if last := n.Pipe.Cmds[m-1]; len(last.Args) != 0 {
-                       if i, ok := last.Args[0].(*parse.IdentifierNode); ok && i.Ident == sanitizer {
-                               return c
+       dups := 0
+       for _, id := range idents {
+               if s[dups] == (id.Args[0].(*parse.IdentifierNode)).Ident {
+                       dups++
+                       if dups == len(s) {
+                               return
                         }
                 }
         }
-       // Otherwise, append the sanitizer.
-       n.Pipe.Cmds = append(n.Pipe.Cmds, &parse.CommandNode{
+       newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups)
+       copy(newCmds, p.Cmds)
+       // Merge existing identifier commands with the sanitizers needed.
+       for _, id := range idents {
+               i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s)
+               if i != -1 {
+                       for _, name := range s[:i] {
+                               newCmds = append(newCmds, newIdentCmd(name))
+                       }
+                       s = s[i+1:]
+               }
+               newCmds = append(newCmds, id)
+       }
+       // Create any remaining sanitizers.
+       for _, name := range s {
+               newCmds = append(newCmds, newIdentCmd(name))
+       }
+       p.Cmds = newCmds
+}
+
+// indexOfStr is the least i such that strs[i] == s or -1 if s is not in strs.
+func indexOfStr(s string, strs []string) int {
+       for i, t := range strs {
+               if s == t {
+                       return i
+               }
+       }
+       return -1
+}
+
+// newIdentCmd produces a command containing a single identifier node.
+func newIdentCmd(identifier string) *parse.CommandNode {
+       return &parse.CommandNode{
                 NodeType: parse.NodeCommand,
-               Args:     []parse.Node{parse.NewIdentifier(sanitizer)},
-       })
-       return c
+               Args:     []parse.Node{parse.NewIdentifier(identifier)},
+       }
  }
  
  // join joins the two contexts of a branch template node. The result is an
@@ -203,11 +280,17 @@ func escapeText(c context, s []byte) context {
  // A transition function takes a context and template text input, and returns
  // the updated context and any unconsumed text.
  var transitionFunc = [...]func(context, []byte) (context, []byte){
-       stateText:  tText,
-       stateTag:   tTag,
-       stateURL:   tURL,
-       stateAttr:  tAttr,
-       stateError: tError,
+       stateText:       tText,
+       stateTag:        tTag,
+       stateURL:        tURL,
+       stateJS:         tJS,
+       stateJSDqStr:    tJSStr,
+       stateJSSqStr:    tJSStr,
+       stateJSRegexp:   tJSRegexp,
+       stateJSBlockCmt: tJSBlockCmt,
+       stateJSLineCmt:  tJSLineCmt,
+       stateAttr:       tAttr,
+       stateError:      tError,
  }
  
  // tText is the context transition function for the text state.
@@ -249,8 +332,11 @@ func tTag(c context, s []byte) (context, []byte) {
                 return context{state: stateTag}, nil
         }
         state := stateAttr
-       if urlAttr[strings.ToLower(string(s[attrStart:i]))] {
+       canonAttrName := strings.ToLower(string(s[attrStart:i]))
+       if urlAttr[canonAttrName] {
                 state = stateURL
+       } else if strings.HasPrefix(canonAttrName, "on") {
+               state = stateJS
         }
  
         // Look for the start of the value.
@@ -268,16 +354,17 @@ func tTag(c context, s []byte) (context, []byte) {
         i = eatWhiteSpace(s, i+1)
  
         // Find the attribute delimiter.
+       delim := delimSpaceOrTagEnd
         if i < len(s) {
                 switch s[i] {
                 case '\'':
-                       return context{state: state, delim: delimSingleQuote}, s[i+1:]
+                       delim, i = delimSingleQuote, i+1
                 case '"':
-                       return context{state: state, delim: delimDoubleQuote}, s[i+1:]
+                       delim, i = delimDoubleQuote, i+1
                 }
         }
  
-       return context{state: state, delim: delimSpaceOrTagEnd}, s[i:]
+       return context{state: state, delim: delim}, s[i:]
  }
  
  // tAttr is the context transition function for the attribute state.
@@ -295,6 +382,154 @@ func tURL(c context, s []byte) (context, []byte) {
         return c, nil
  }
  
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, []byte) {
+       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
+       // has been merged.
+
+       i := bytes.IndexAny(s, `"'/`)
+       if i == -1 {
+               // Entire input is non string, comment, regexp tokens.
+               c.jsCtx = nextJSCtx(s, c.jsCtx)
+               return c, nil
+       }
+       c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+       switch s[i] {
+       case '"':
+               c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+       case '\'':
+               c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+       case '/':
+               switch {
+               case i+1 < len(s) && s[i+1] == '/':
+                       c.state = stateJSLineCmt
+               case i+1 < len(s) && s[i+1] == '*':
+                       c.state = stateJSBlockCmt
+               case c.jsCtx == jsCtxRegexp:
+                       c.state = stateJSRegexp
+               default:
+                       c.jsCtx = jsCtxRegexp
+               }
+       default:
+               panic("unreachable")
+       }
+       return c, s[i+1:]
+}
+
+// tJSStr is the context transition function for the JS string states.
+func tJSStr(c context, s []byte) (context, []byte) {
+       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
+       // has been merged.
+
+       quoteAndEsc := `\"`
+       if c.state == stateJSSqStr {
+               quoteAndEsc = `\'`
+       }
+
+       b := s
+       for {
+               i := bytes.IndexAny(b, quoteAndEsc)
+               if i == -1 {
+                       return c, nil
+               }
+               if b[i] == '\\' {
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in JS string: %q", s),
+                               }, nil
+                       }
+               } else {
+                       c.state, c.jsCtx = stateJS, jsCtxDivOp
+                       return c, b[i+1:]
+               }
+               b = b[i+1:]
+       }
+       panic("unreachable")
+}
+
+// tJSRegexp is the context transition function for the /RegExp/ literal state.
+func tJSRegexp(c context, s []byte) (context, []byte) {
+       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
+       // has been merged.
+
+       b := s
+       inCharset := false
+       for {
+               i := bytes.IndexAny(b, `/[\]`)
+               if i == -1 {
+                       break
+               }
+               switch b[i] {
+               case '/':
+                       if !inCharset {
+                               c.state, c.jsCtx = stateJS, jsCtxDivOp
+                               return c, b[i+1:]
+                       }
+               case '\\':
+                       i++
+                       if i == len(b) {
+                               return context{
+                                       state:  stateError,
+                                       errStr: fmt.Sprintf("unfinished escape sequence in JS regexp: %q", s),
+                               }, nil
+                       }
+               case '[':
+                       inCharset = true
+               case ']':
+                       inCharset = false
+               default:
+                       panic("unreachable")
+               }
+               b = b[i+1:]
+       }
+
+       if inCharset {
+               // This can be fixed by making context richer if interpolation
+               // into charsets is desired.
+               return context{
+                       state:  stateError,
+                       errStr: fmt.Sprintf("unfinished JS regexp charset: %q", s),
+               }, nil
+       }
+
+       return c, nil
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tJSBlockCmt is the context transition function for the JS /*comment*/ state.
+func tJSBlockCmt(c context, s []byte) (context, []byte) {
+       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
+       // has been merged.
+
+       i := bytes.Index(s, blockCommentEnd)
+       if i == -1 {
+               return c, nil
+       }
+       c.state = stateJS
+       return c, s[i+2:]
+}
+
+// tJSLineCmt is the context transition function for the JS //comment state.
+func tJSLineCmt(c context, s []byte) (context, []byte) {
+       // TODO: delegate to tSpecialTagEnd to find any </script> once that CL
+       // has been merged.
+
+       i := bytes.IndexAny(s, "\r\n\u2028\u2029")
+       if i == -1 {
+               return c, nil
+       }
+       c.state = stateJS
+       // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+       // "However, the LineTerminator at the end of the line is not
+       // considered to be part of the single-line comment; it is recognised
+       // separately by the lexical grammar and becomes part of the stream of
+       // input elements for the syntactic grammar."
+       return c, s[i:]
+}
+
  // tError is the context transition function for the error state.
  func tError(c context, s []byte) (context, []byte) {
         return c, nil
diff --git a/src/pkg/exp/template/html/escape_test.go b/src/pkg/exp/template/html/escape_test.go

index a911c7d8357b3791e0f6f1611f6bd9c9a2153f0f..6f5ecf6ef3e84959189bd7f0d55a7eaa05a87b78 100644 (file)
--- a/src/pkg/exp/template/html/escape_test.go
+++ b/src/pkg/exp/template/html/escape_test.go
@@ -8,6 +8,7 @@ import (
         "bytes"
         "strings"
         "template"
+       "template/parse"
         "testing"
  )
  
@@ -16,6 +17,8 @@ func TestEscape(t *testing.T) {
                 F, T    bool
                 C, G, H string
                 A, E    []string
+               N       int
+               Z       *int
         }{
                 F: false,
                 T: true,
@@ -24,9 +27,11 @@ func TestEscape(t *testing.T) {
                 H: "<Hello>",
                 A: []string{"<a>", "<b>"},
                 E: []string{},
+               N: 42,
+               Z: nil,
         }
  
-       var testCases = []struct {
+       tests := []struct {
                 name   string
                 input  string
                 output string
@@ -141,29 +146,71 @@ func TestEscape(t *testing.T) {
                         `<a href="{{if .T}}/foo?a={{else}}/bar#{{end}}{{.C}}">`,
                         `<a href="/foo?a=%3CCincinatti%3E">`,
                 },
+               {
+                       "jsStrValue",
+                       "<button onclick='alert({{.H}})'>",
+                       `<button onclick='alert(&#34;\u003cHello\u003e&#34;)'>`,
+               },
+               {
+                       "jsNumericValue",
+                       "<button onclick='alert({{.N}})'>",
+                       `<button onclick='alert( 42 )'>`,
+               },
+               {
+                       "jsBoolValue",
+                       "<button onclick='alert({{.T}})'>",
+                       `<button onclick='alert( true )'>`,
+               },
+               {
+                       "jsNilValue",
+                       "<button onclick='alert(typeof{{.Z}})'>",
+                       `<button onclick='alert(typeof null )'>`,
+               },
+               {
+                       "jsObjValue",
+                       "<button onclick='alert({{.A}})'>",
+                       `<button onclick='alert([&#34;\u003ca\u003e&#34;,&#34;\u003cb\u003e&#34;])'>`,
+               },
+               {
+                       "jsObjValueNotOverEscaped",
+                       "<button onclick='alert({{.A | html}})'>",
+                       `<button onclick='alert([&#34;\u003ca\u003e&#34;,&#34;\u003cb\u003e&#34;])'>`,
+               },
+               {
+                       "jsStr",
+                       "<button onclick='alert(&quot;{{.H}}&quot;)'>",
+                       `<button onclick='alert(&quot;\x3cHello\x3e&quot;)'>`,
+               },
+               {
+                       "jsStrNotUnderEscaped",
+                       "<button onclick='alert({{.C | urlquery}})'>",
+                       // URL escaped, then quoted for JS.
+                       `<button onclick='alert(&#34;%3CCincinatti%3E&#34;)'>`,
+               },
+               {
+                       "jsRe",
+                       "<button onclick='alert(&quot;{{.H}}&quot;)'>",
+                       `<button onclick='alert(&quot;\x3cHello\x3e&quot;)'>`,
+               },
         }
  
-       for _, tc := range testCases {
-               tmpl, err := template.New(tc.name).Parse(tc.input)
-               if err != nil {
-                       t.Errorf("%s: template parsing failed: %s", tc.name, err)
-                       continue
-               }
-               Escape(tmpl)
+       for _, test := range tests {
+               tmpl := template.Must(template.New(test.name).Parse(test.input))
+               tmpl, err := Escape(tmpl)
                 b := new(bytes.Buffer)
                 if err = tmpl.Execute(b, data); err != nil {
-                       t.Errorf("%s: template execution failed: %s", tc.name, err)
+                       t.Errorf("%s: template execution failed: %s", test.name, err)
                         continue
                 }
-               if w, g := tc.output, b.String(); w != g {
-                       t.Errorf("%s: escaped output: want %q got %q", tc.name, w, g)
+               if w, g := test.output, b.String(); w != g {
+                       t.Errorf("%s: escaped output: want\n\t%q\ngot\n\t%q", test.name, w, g)
                         continue
                 }
         }
  }
  
  func TestErrors(t *testing.T) {
-       var testCases = []struct {
+       tests := []struct {
                 input string
                 err   string
         }{
@@ -235,33 +282,53 @@ func TestErrors(t *testing.T) {
                         `<a href="{{if .F}}/foo?a={{else}}/bar/{{end}}{{.H}}">`,
                         "z:1: (action: [(command: [F=[H]])]) appears in an ambiguous URL context",
                 },
+               {
+                       `<a onclick="alert('Hello \`,
+                       `unfinished escape sequence in JS string: "Hello \\"`,
+               },
+               {
+                       `<a onclick='alert("Hello\, World\`,
+                       `unfinished escape sequence in JS string: "Hello\\, World\\"`,
+               },
+               {
+                       `<a onclick='alert(/x+\`,
+                       `unfinished escape sequence in JS regexp: "x+\\"`,
+               },
+               {
+                       `<a onclick="/foo[\]/`,
+                       `unfinished JS regexp charset: "foo[\\]/"`,
+               },
+               {
+                       `<a onclick="/* alert({{.X}} */">`,
+                       `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+               },
+               {
+                       `<a onclick="// alert({{.X}}">`,
+                       `z:1: (action: [(command: [F=[X]])]) appears inside a comment`,
+               },
         }
  
-       for _, tc := range testCases {
-               tmpl, err := template.New("z").Parse(tc.input)
-               if err != nil {
-                       t.Errorf("input=%q: template parsing failed: %s", tc.input, err)
-                       continue
-               }
+       for _, test := range tests {
+               tmpl := template.Must(template.New("z").Parse(test.input))
                 var got string
                 if _, err := Escape(tmpl); err != nil {
                         got = err.String()
                 }
-               if tc.err == "" {
+               if test.err == "" {
                         if got != "" {
-                               t.Errorf("input=%q: unexpected error %q", tc.input, got)
+                               t.Errorf("input=%q: unexpected error %q", test.input, got)
                         }
                         continue
                 }
-               if strings.Index(got, tc.err) == -1 {
-                       t.Errorf("input=%q: error %q does not contain expected string %q", tc.input, got, tc.err)
+               if strings.Index(got, test.err) == -1 {
+                       t.Errorf("input=%q: error %q does not contain expected string %q", test.input, got, test.err)
                         continue
                 }
         }
  }
  
  func TestEscapeText(t *testing.T) {
-       var testCases = []struct {
+       tests := []struct {
                 input  string
                 output context
         }{
@@ -378,18 +445,173 @@ func TestEscapeText(t *testing.T) {
                         `<input checked type="checkbox"`,
                         context{state: stateTag},
                 },
+               {
+                       `<a onclick="`,
+                       context{state: stateJS, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="//foo`,
+                       context{state: stateJSLineCmt, delim: delimDoubleQuote},
+               },
+               {
+                       "<a onclick='//\n",
+                       context{state: stateJS, delim: delimSingleQuote},
+               },
+               {
+                       "<a onclick='//\r\n",
+                       context{state: stateJS, delim: delimSingleQuote},
+               },
+               {
+                       "<a onclick='//\u2028",
+                       context{state: stateJS, delim: delimSingleQuote},
+               },
+               {
+                       `<a onclick="/*`,
+                       context{state: stateJSBlockCmt, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onkeypress="&quot;`,
+                       context{state: stateJSDqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick='&quot;foo&quot;`,
+                       context{state: stateJS, delim: delimSingleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick=&#39;foo&#39;`,
+                       context{state: stateJS, delim: delimSpaceOrTagEnd, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick=&#39;foo`,
+                       context{state: stateJSSqStr, delim: delimSpaceOrTagEnd},
+               },
+               {
+                       `<a onclick="&quot;foo'`,
+                       context{state: stateJSDqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="'foo&quot;`,
+                       context{state: stateJSSqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<A ONCLICK="'`,
+                       context{state: stateJSSqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="/`,
+                       context{state: stateJSRegexp, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="'foo'`,
+                       context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick="'foo\'`,
+                       context{state: stateJSSqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="'foo\'`,
+                       context{state: stateJSSqStr, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="/foo/`,
+                       context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick="1 /foo`,
+                       context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick="1 /*c*/ /foo`,
+                       context{state: stateJS, delim: delimDoubleQuote, jsCtx: jsCtxDivOp},
+               },
+               {
+                       `<a onclick="/foo[/]`,
+                       context{state: stateJSRegexp, delim: delimDoubleQuote},
+               },
+               {
+                       `<a onclick="/foo\/`,
+                       context{state: stateJSRegexp, delim: delimDoubleQuote},
+               },
         }
  
-       for _, tc := range testCases {
-               b := []byte(tc.input)
+       for _, test := range tests {
+               b := []byte(test.input)
                 c := escapeText(context{}, b)
-               if !tc.output.eq(c) {
-                       t.Errorf("input %q: want context %v got %v", tc.input, tc.output, c)
+               if !test.output.eq(c) {
+                       t.Errorf("input %q: want context\n\t%v\ngot\n\t%v", test.input, test.output, c)
                         continue
                 }
-               if tc.input != string(b) {
-                       t.Errorf("input %q: text node was modified: want %q got %q", tc.input, tc.input, b)
+               if test.input != string(b) {
+                       t.Errorf("input %q: text node was modified: want %q got %q", test.input, test.input, b)
                         continue
                 }
         }
  }
+
+func TestEnsurePipelineContains(t *testing.T) {
+       tests := []struct {
+               input, output string
+               ids           []string
+       }{
+               {
+                       "{{.X}}",
+                       "[(command: [F=[X]])]",
+                       []string{},
+               },
+               {
+                       "{{.X | html}}",
+                       "[(command: [F=[X]]) (command: [I=html])]",
+                       []string{},
+               },
+               {
+                       "{{.X}}",
+                       "[(command: [F=[X]]) (command: [I=html])]",
+                       []string{"html"},
+               },
+               {
+                       "{{.X | html}}",
+                       "[(command: [F=[X]]) (command: [I=html]) (command: [I=urlquery])]",
+                       []string{"urlquery"},
+               },
+               {
+                       "{{.X | html | urlquery}}",
+                       "[(command: [F=[X]]) (command: [I=html]) (command: [I=urlquery])]",
+                       []string{"urlquery"},
+               },
+               {
+                       "{{.X | html | urlquery}}",
+                       "[(command: [F=[X]]) (command: [I=html]) (command: [I=urlquery])]",
+                       []string{"html", "urlquery"},
+               },
+               {
+                       "{{.X | html | urlquery}}",
+                       "[(command: [F=[X]]) (command: [I=html]) (command: [I=urlquery])]",
+                       []string{"html"},
+               },
+               {
+                       "{{.X | urlquery}}",
+                       "[(command: [F=[X]]) (command: [I=html]) (command: [I=urlquery])]",
+                       []string{"html", "urlquery"},
+               },
+               {
+                       "{{.X | html | print}}",
+                       "[(command: [F=[X]]) (command: [I=urlquery]) (command: [I=html]) (command: [I=print])]",
+                       []string{"urlquery", "html"},
+               },
+       }
+       for _, test := range tests {
+               tmpl := template.Must(template.New("test").Parse(test.input))
+               action, ok := (tmpl.Tree.Root.Nodes[0].(*parse.ActionNode))
+               if !ok {
+                       t.Errorf("First node is not an action: %s", test.input)
+                       continue
+               }
+               pipe := action.Pipe
+               ensurePipelineContains(pipe, test.ids)
+               got := pipe.String()
+               if got != test.output {
+                       t.Errorf("%s, %v: want\n\t%s\ngot\n\t%s", test.input, test.ids, test.output, got)
+               }
+       }
+}
diff --git a/src/pkg/exp/template/html/js.go b/src/pkg/exp/template/html/js.go

new file mode 100644 (file)

index 0000000..d29e057
--- /dev/null
+++ b/src/pkg/exp/template/html/js.go
@@ -0,0 +1,344 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "json"
+       "strings"
+       "utf8"
+)
+
+// nextJSCtx returns the context that determines whether a slash after the
+// given run of tokens tokens starts a regular expression instead of a division
+// operator: / or /=.
+//
+// This assumes that the token run does not include any string tokens, comment
+// tokens, regular expression literal tokens, or division operators.
+//
+// This fails on some valid but nonsensical JavaScript programs like
+// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
+// fail on any known useful programs. It is based on the draft
+// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
+// http://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
+func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
+       s = bytes.TrimRight(s, "\t\n\f\r \u2028\u2029")
+       if len(s) == 0 {
+               return preceding
+       }
+
+       // All cases below are in the single-byte UTF-8 group.
+       switch c, n := s[len(s)-1], len(s); c {
+       case '+', '-':
+               // ++ and -- are not regexp preceders, but + and - are whether
+               // they are used as infix or prefix operators.
+               start := n - 1
+               // Count the number of adjacent dashes or pluses.
+               for start > 0 && s[start-1] == c {
+                       start--
+               }
+               if (n-start)&1 == 1 {
+                       // Reached for trailing minus signs since "---" is the
+                       // same as "-- -".
+                       return jsCtxRegexp
+               }
+               return jsCtxDivOp
+       case '.':
+               // Handle "42."
+               if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
+                       return jsCtxDivOp
+               }
+               return jsCtxRegexp
+       // Suffixes for all punctuators from section 7.7 of the language spec
+       // that only end binary operators not handled above.
+       case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
+               return jsCtxRegexp
+       // Suffixes for all punctuators from section 7.7 of the language spec
+       // that are prefix operators not handled above.
+       case '!', '~':
+               return jsCtxRegexp
+       // Matches all the punctuators from section 7.7 of the language spec
+       // that are open brackets not handled above.
+       case '(', '[':
+               return jsCtxRegexp
+       // Matches all the punctuators from section 7.7 of the language spec
+       // that precede expression starts.
+       case ':', ';', '{':
+               return jsCtxRegexp
+       // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
+       // are handled in the default except for '}' which can precede a
+       // division op as in
+       //    ({ valueOf: function () { return 42 } } / 2
+       // which is valid, but, in practice, developers don't divide object
+       // literals, so our heuristic works well for code like
+       //    function () { ... }  /foo/.test(x) && sideEffect();
+       // The ')' punctuator can precede a regular expression as in
+       //     if (b) /foo/.test(x) && ...
+       // but this is much less likely than
+       //     (a + b) / c
+       case '}':
+               return jsCtxRegexp
+       default:
+               // Look for an IdentifierName and see if it is a keyword that
+               // can precede a regular expression.
+               j := n
+               for j > 0 && isJSIdentPart(int(s[j-1])) {
+                       j--
+               }
+               if regexpPrecederKeywords[string(s[j:])] {
+                       return jsCtxRegexp
+               }
+       }
+       // Otherwise is a punctuator not listed above, or
+       // a string which precedes a div op, or an identifier
+       // which precedes a div op.
+       return jsCtxDivOp
+}
+
+// regexPrecederKeywords is a set of reserved JS keywords that can precede a
+// regular expression in JS source.
+var regexpPrecederKeywords = map[string]bool{
+       "break":      true,
+       "case":       true,
+       "continue":   true,
+       "delete":     true,
+       "do":         true,
+       "else":       true,
+       "finally":    true,
+       "in":         true,
+       "instanceof": true,
+       "return":     true,
+       "throw":      true,
+       "try":        true,
+       "typeof":     true,
+       "void":       true,
+}
+
+// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
+// nether side-effects nor free variables outside (NaN, Infinity).
+func jsValEscaper(args ...interface{}) string {
+       var a interface{}
+       if len(args) == 1 {
+               a = args[0]
+       } else {
+               a = fmt.Sprint(args...)
+       }
+       // TODO: detect cycles before calling Marshal which loops infinitely on
+       // cyclic data. This may be an unnacceptable DoS risk.
+
+       // TODO: make sure that json.Marshal escapes codepoints U+2028 & U+2029
+       // so it falls within the subset of JSON which is valid JS and maybe
+       // post-process to prevent it from containing
+       // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
+       // in case custom marshallers produce output containing those.
+
+       // TODO: Maybe abbreviate \u00ab to \xab to produce more compact output.
+
+       // TODO: JSON allows arbitrary unicode codepoints, but EcmaScript
+       // defines a SourceCharacter as either a UTF-16 or UCS-2 code-unit.
+       // Determine whether supplemental codepoints in UTF-8 encoded JS inside
+       // string literals are properly interpreted by major interpreters.
+
+       b, err := json.Marshal(a)
+       if err != nil {
+               // Put a space before comment so that if it is flush against
+               // a division operator it is not turned into a line comment:
+               //     x/{{y}}
+               // turning into
+               //     x//* error marshalling y:
+               //          second line of error message */null
+               return fmt.Sprintf(" /* %s */null ", strings.Replace(err.String(), "*/", "* /", -1))
+       }
+       if len(b) != 0 {
+               first, _ := utf8.DecodeRune(b)
+               last, _ := utf8.DecodeLastRune(b)
+               if isJSIdentPart(first) || isJSIdentPart(last) {
+                       return " " + string(b) + " "
+               }
+       }
+       return string(b)
+}
+
+// jsStrEscaper produces a string that can be included between quotes in
+// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
+// or in an HTML5 event handler attribute such as onclick.
+func jsStrEscaper(args ...interface{}) string {
+       ok := false
+       var s string
+       if len(args) == 1 {
+               s, ok = args[0].(string)
+       }
+       if !ok {
+               s = fmt.Sprint(args...)
+       }
+       var b bytes.Buffer
+       written := 0
+       for i, r := range s {
+               var repl string
+               switch r {
+               // All cases must appear in the IndexAny call above.
+               case 0:
+                       repl = `\0`
+               case '\t':
+                       repl = `\t`
+               case '\n':
+                       repl = `\n`
+               case '\v':
+                       // "\v" == "v" on IE 6.
+                       repl = `\x0b`
+               case '\f':
+                       repl = `\f`
+               case '\r':
+                       repl = `\r`
+               // Encode HTML specials as hex so the output can be embedded
+               // in HTML attributes without further encoding.
+               case '"':
+                       repl = `\x22`
+               case '&':
+                       repl = `\x26`
+               case '\'':
+                       repl = `\x27`
+               case '+':
+                       repl = `\x2b`
+               case '/':
+                       repl = `\/`
+               case '<':
+                       repl = `\x3c`
+               case '>':
+                       repl = `\x3e`
+               case '\\':
+                       repl = `\\`
+               case '\u2028':
+                       repl = `\u2028`
+               case '\u2029':
+                       repl = `\u2029`
+               default:
+                       continue
+               }
+               b.WriteString(s[written:i])
+               b.WriteString(repl)
+               written = i + utf8.RuneLen(r)
+       }
+       if b.Len() == 0 {
+               return s
+       }
+       b.WriteString(s[written:])
+       return b.String()
+}
+
+// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
+// specials so the result is treated literally when included in a regular
+// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
+// the literal text of {{.X}} followed by the string "bar".
+func jsRegexpEscaper(args ...interface{}) string {
+       ok := false
+       var s string
+       if len(args) == 1 {
+               s, ok = args[0].(string)
+       }
+       if !ok {
+               s = fmt.Sprint(args...)
+       }
+       var b bytes.Buffer
+       written := 0
+       for i, r := range s {
+               var repl string
+               switch r {
+               // All cases must appear in the IndexAny call above.
+               case 0:
+                       repl = `\0`
+               case '\t':
+                       repl = `\t`
+               case '\n':
+                       repl = `\n`
+               case '\v':
+                       // "\v" == "v" on IE 6.
+                       repl = `\x0b`
+               case '\f':
+                       repl = `\f`
+               case '\r':
+                       repl = `\r`
+               // Encode HTML specials as hex so the output can be embedded
+               // in HTML attributes without further encoding.
+               case '"':
+                       repl = `\x22`
+               case '$':
+                       repl = `\$`
+               case '&':
+                       repl = `\x26`
+               case '\'':
+                       repl = `\x27`
+               case '(':
+                       repl = `\(`
+               case ')':
+                       repl = `\)`
+               case '*':
+                       repl = `\*`
+               case '+':
+                       repl = `\x2b`
+               case '-':
+                       repl = `\-`
+               case '.':
+                       repl = `\.`
+               case '/':
+                       repl = `\/`
+               case '<':
+                       repl = `\x3c`
+               case '>':
+                       repl = `\x3e`
+               case '?':
+                       repl = `\?`
+               case '[':
+                       repl = `\[`
+               case '\\':
+                       repl = `\\`
+               case ']':
+                       repl = `\]`
+               case '^':
+                       repl = `\^`
+               case '{':
+                       repl = `\{`
+               case '|':
+                       repl = `\|`
+               case '}':
+                       repl = `\}`
+               case '\u2028':
+                       repl = `\u2028`
+               case '\u2029':
+                       repl = `\u2029`
+               default:
+                       continue
+               }
+               b.WriteString(s[written:i])
+               b.WriteString(repl)
+               written = i + utf8.RuneLen(r)
+       }
+       if b.Len() == 0 {
+               return s
+       }
+       b.WriteString(s[written:])
+       return b.String()
+}
+
+// isJSIdentPart is true if the given rune is a JS identifier part.
+// It does not handle all the non-Latin letters, joiners, and combining marks,
+// but it does handle every codepoint that can occur in a numeric literal or
+// a keyword.
+func isJSIdentPart(rune int) bool {
+       switch {
+       case '$' == rune:
+               return true
+       case '0' <= rune && rune <= '9':
+               return true
+       case 'A' <= rune && rune <= 'Z':
+               return true
+       case '_' == rune:
+               return true
+       case 'a' <= rune && rune <= 'z':
+               return true
+       }
+       return false
+}
diff --git a/src/pkg/exp/template/html/js_test.go b/src/pkg/exp/template/html/js_test.go

new file mode 100644 (file)

index 0000000..0a51a21
--- /dev/null
+++ b/src/pkg/exp/template/html/js_test.go
@@ -0,0 +1,352 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "math"
+       "strings"
+       "testing"
+)
+
+func TestNextJsCtx(t *testing.T) {
+       tests := []struct {
+               jsCtx jsCtx
+               s     string
+       }{
+               // Statement terminators precede regexps.
+               {jsCtxRegexp, ";"},
+               // This is not airtight.
+               //     ({ valueOf: function () { return 1 } } / 2)
+               // is valid JavaScript but in practice, devs do not do this.
+               // A block followed by a statement starting with a RegExp is
+               // much more common:
+               //     while (x) {...} /foo/.test(x) || panic()
+               {jsCtxRegexp, "}"},
+               // But member, call, grouping, and array expression terminators
+               // precede div ops.
+               {jsCtxDivOp, ")"},
+               {jsCtxDivOp, "]"},
+               // At the start of a primary expression, array, or expression
+               // statement, expect a regexp.
+               {jsCtxRegexp, "("},
+               {jsCtxRegexp, "["},
+               {jsCtxRegexp, "{"},
+               // Assignment operators precede regexps as do all exclusively
+               // prefix and binary operators.
+               {jsCtxRegexp, "="},
+               {jsCtxRegexp, "+="},
+               {jsCtxRegexp, "*="},
+               {jsCtxRegexp, "*"},
+               {jsCtxRegexp, "!"},
+               // Whether the + or - is infix or prefix, it cannot precede a
+               // div op.
+               {jsCtxRegexp, "+"},
+               {jsCtxRegexp, "-"},
+               // An incr/decr op precedes a div operator.
+               // This is not airtight.  In (g = ++/h/i) a regexp follows a
+               // pre-increment operator, but in practice devs do not try to
+               // increment or decrement regular expressions.
+               // (g++/h/i) where ++ is a postfix operator on g is much more
+               // common.
+               {jsCtxDivOp, "--"},
+               {jsCtxDivOp, "++"},
+               {jsCtxDivOp, "x--"},
+               // When we have many dashes or pluses, then they are grouped
+               // left to right.
+               {jsCtxRegexp, "x---"}, // A postfix -- then a -.
+               // return followed by a slash returns the regexp literal or the
+               // slash starts a regexp literal in an expression statement that
+               // is dead code.
+               {jsCtxRegexp, "return"},
+               {jsCtxRegexp, "return "},
+               {jsCtxRegexp, "return\t"},
+               {jsCtxRegexp, "return\n"},
+               {jsCtxRegexp, "return\u2028"},
+               // Identifiers can be divided and cannot validly be preceded by
+               // a regular expressions.  Semicolon insertion cannot happen
+               // between an identifier and a regular expression on a new line
+               // because the one token lookahead for semicolon insertion has
+               // to conclude that it could be a div binary op and treat it as
+               // such.
+               {jsCtxDivOp, "x"},
+               {jsCtxDivOp, "x "},
+               {jsCtxDivOp, "x\t"},
+               {jsCtxDivOp, "x\n"},
+               {jsCtxDivOp, "x\u2028"},
+               {jsCtxDivOp, "preturn"},
+               // Numbers precede div ops.
+               {jsCtxDivOp, "0"},
+               // Dots that are part of a number are div preceders.
+               {jsCtxDivOp, "0."},
+       }
+
+       for _, test := range tests {
+               if nextJSCtx([]byte(test.s), jsCtxRegexp) != test.jsCtx {
+                       t.Errorf("want %s got %q", test.jsCtx, test.s)
+               }
+               if nextJSCtx([]byte(test.s), jsCtxDivOp) != test.jsCtx {
+                       t.Errorf("want %s got %q", test.jsCtx, test.s)
+               }
+       }
+
+       if nextJSCtx([]byte("   "), jsCtxRegexp) != jsCtxRegexp {
+               t.Error("Blank tokens")
+       }
+
+       if nextJSCtx([]byte("   "), jsCtxDivOp) != jsCtxDivOp {
+               t.Error("Blank tokens")
+       }
+}
+
+func TestJSValEscaper(t *testing.T) {
+       tests := []struct {
+               x  interface{}
+               js string
+       }{
+               {int(42), " 42 "},
+               {uint(42), " 42 "},
+               {int16(42), " 42 "},
+               {uint16(42), " 42 "},
+               {int32(-42), " -42 "},
+               {uint32(42), " 42 "},
+               {int16(-42), " -42 "},
+               {uint16(42), " 42 "},
+               {int64(-42), " -42 "},
+               {uint64(42), " 42 "},
+               {uint64(1) << 53, " 9007199254740992 "},
+               // ulp(1 << 53) > 1 so this loses precision in JS
+               // but it is still a representable integer literal.
+               {uint64(1)<<53 + 1, " 9007199254740993 "},
+               {float32(1.0), " 1 "},
+               {float32(-1.0), " -1 "},
+               {float32(0.5), " 0.5 "},
+               {float32(-0.5), " -0.5 "},
+               {float32(1.0) / float32(256), " 0.00390625 "},
+               {float32(0), " 0 "},
+               {math.Copysign(0, -1), " -0 "},
+               {float64(1.0), " 1 "},
+               {float64(-1.0), " -1 "},
+               {float64(0.5), " 0.5 "},
+               {float64(-0.5), " -0.5 "},
+               {float64(0), " 0 "},
+               {math.Copysign(0, -1), " -0 "},
+               {"", `""`},
+               {"foo", `"foo"`},
+               // Newlines.
+               // {"\r\n\u2028\u2029", `"\r\n\u2028\u2029"`}, // TODO: FAILING.  Maybe fix in json package.
+               // "\v" == "v" on IE 6 so use "\x0b" instead.
+               {"\t\x0b", `"\u0009\u000b"`},
+               {struct{ X, Y int }{1, 2}, `{"X":1,"Y":2}`},
+               {[]interface{}{}, "[]"},
+               {[]interface{}{42, "foo", nil}, `[42,"foo",null]`},
+               {"<!--", `"\u003c!--"`},
+               {"-->", `"--\u003e"`},
+               {"<![CDATA[", `"\u003c![CDATA["`},
+               {"]]>", `"]]\u003e"`},
+               {"</script", `"\u003c/script"`},
+               {"\U0001D11E", "\"\U0001D11E\""}, // or "\uD834\uDD1E"
+       }
+
+       for _, test := range tests {
+               if js := jsValEscaper(test.x); js != test.js {
+                       t.Errorf("%+v: want\n\t%q\ngot\n\t%q", test.x, test.js, js)
+               }
+               // Make sure that escaping corner cases are not broken
+               // by nesting.
+               a := []interface{}{test.x}
+               want := "[" + strings.TrimSpace(test.js) + "]"
+               if js := jsValEscaper(a); js != want {
+                       t.Errorf("%+v: want\n\t%q\ngot\n\t%q", a, want, js)
+               }
+       }
+}
+
+func TestJSStrEscaper(t *testing.T) {
+       tests := []struct {
+               x   interface{}
+               esc string
+       }{
+               {"", ``},
+               {"foo", `foo`},
+               {"\u0000", `\0`},
+               {"\t", `\t`},
+               {"\n", `\n`},
+               {"\r", `\r`},
+               {"\u2028", `\u2028`},
+               {"\u2029", `\u2029`},
+               {"\\", `\\`},
+               {"\\n", `\\n`},
+               {"foo\r\nbar", `foo\r\nbar`},
+               // Preserve attribute boundaries.
+               {`"`, `\x22`},
+               {`'`, `\x27`},
+               // Allow embedding in HTML without further escaping.
+               {`&amp;`, `\x26amp;`},
+               // Prevent breaking out of text node and element boundaries.
+               {"</script>", `\x3c\/script\x3e`},
+               {"<![CDATA[", `\x3c![CDATA[`},
+               {"]]>", `]]\x3e`},
+               // http://dev.w3.org/html5/markup/aria/syntax.html#escaping-text-span
+               //   "The text in style, script, title, and textarea elements
+               //   must not have an escaping text span start that is not
+               //   followed by an escaping text span end."
+               // Furthermore, spoofing an escaping text span end could lead
+               // to different interpretation of a </script> sequence otherwise
+               // masked by the escaping text span, and spoofing a start could
+               // allow regular text content to be interpreted as script
+               // allowing script execution via a combination of a JS string
+               // injection followed by an HTML text injection.
+               {"<!--", `\x3c!--`},
+               {"-->", `--\x3e`},
+               // From http://code.google.com/p/doctype/wiki/ArticleUtf7
+               {"+ADw-script+AD4-alert(1)+ADw-/script+AD4-",
+                       `\x2bADw-script\x2bAD4-alert(1)\x2bADw-\/script\x2bAD4-`,
+               },
+       }
+
+       for _, test := range tests {
+               esc := jsStrEscaper(test.x)
+               if esc != test.esc {
+                       t.Errorf("%q: want %q got %q", test.x, test.esc, esc)
+               }
+       }
+}
+
+func TestJSRegexpEscaper(t *testing.T) {
+       tests := []struct {
+               x   interface{}
+               esc string
+       }{
+               {"", ``},
+               {"foo", `foo`},
+               {"\u0000", `\0`},
+               {"\t", `\t`},
+               {"\n", `\n`},
+               {"\r", `\r`},
+               {"\u2028", `\u2028`},
+               {"\u2029", `\u2029`},
+               {"\\", `\\`},
+               {"\\n", `\\n`},
+               {"foo\r\nbar", `foo\r\nbar`},
+               // Preserve attribute boundaries.
+               {`"`, `\x22`},
+               {`'`, `\x27`},
+               // Allow embedding in HTML without further escaping.
+               {`&amp;`, `\x26amp;`},
+               // Prevent breaking out of text node and element boundaries.
+               {"</script>", `\x3c\/script\x3e`},
+               {"<![CDATA[", `\x3c!\[CDATA\[`},
+               {"]]>", `\]\]\x3e`},
+               // Escaping text spans.
+               {"<!--", `\x3c!\-\-`},
+               {"-->", `\-\-\x3e`},
+               {"*", `\*`},
+               {"+", `\x2b`},
+               {"?", `\?`},
+               {"[](){}", `\[\]\(\)\{\}`},
+               {"$foo|x.y", `\$foo\|x\.y`},
+               {"x^y", `x\^y`},
+       }
+
+       for _, test := range tests {
+               esc := jsRegexpEscaper(test.x)
+               if esc != test.esc {
+                       t.Errorf("%q: want %q got %q", test.x, test.esc, esc)
+               }
+       }
+}
+
+func TestEscapersOnLower7AndSelectHighCodepoints(t *testing.T) {
+       input := ("\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f" +
+               "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+               ` !"#$%&'()*+,-./` +
+               `0123456789:;<=>?` +
+               `@ABCDEFGHIJKLMNO` +
+               `PQRSTUVWXYZ[\]^_` +
+               "`abcdefghijklmno" +
+               "pqrstuvwxyz{|}~\x7f" +
+               "\u00A0\u0100\u2028\u2029\ufeff\U0001D11E")
+
+       tests := []struct {
+               name    string
+               escaper func(...interface{}) string
+               escaped string
+       }{
+               {
+                       "jsStrEscaper",
+                       jsStrEscaper,
+                       "\\0\x01\x02\x03\x04\x05\x06\x07" +
+                               "\x08\\t\\n\\x0b\\f\\r\x0E\x0F" +
+                               "\x10\x11\x12\x13\x14\x15\x16\x17" +
+                               "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+                               ` !\x22#$%\x26\x27()*\x2b,-.\/` +
+                               `0123456789:;\x3c=\x3e?` +
+                               `@ABCDEFGHIJKLMNO` +
+                               `PQRSTUVWXYZ[\\]^_` +
+                               "`abcdefghijklmno" +
+                               "pqrstuvwxyz{|}~\x7f" +
+                               "\u00A0\u0100\\u2028\\u2029\ufeff\U0001D11E",
+               },
+               {
+                       "jsRegexpEscaper",
+                       jsRegexpEscaper,
+                       "\\0\x01\x02\x03\x04\x05\x06\x07" +
+                               "\x08\\t\\n\\x0b\\f\\r\x0E\x0F" +
+                               "\x10\x11\x12\x13\x14\x15\x16\x17" +
+                               "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
+                               ` !\x22#\$%\x26\x27\(\)\*\x2b,\-\.\/` +
+                               `0123456789:;\x3c=\x3e\?` +
+                               `@ABCDEFGHIJKLMNO` +
+                               `PQRSTUVWXYZ\[\\\]\^_` +
+                               "`abcdefghijklmno" +
+                               `pqrstuvwxyz\{\|\}~` + "\u007f" +
+                               "\u00A0\u0100\\u2028\\u2029\ufeff\U0001D11E",
+               },
+       }
+
+       for _, test := range tests {
+               if s := test.escaper(input); s != test.escaped {
+                       t.Errorf("%s once: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+                       continue
+               }
+
+               // Escape it rune by rune to make sure that any
+               // fast-path checking does not break escaping.
+               var buf bytes.Buffer
+               for _, c := range input {
+                       buf.WriteString(test.escaper(string(c)))
+               }
+
+               if s := buf.String(); s != test.escaped {
+                       t.Errorf("%s rune-wise: want\n\t%q\ngot\n\t%q", test.name, test.escaped, s)
+                       continue
+               }
+       }
+}
+
+func BenchmarkJSStrEscaperNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               jsStrEscaper("The quick, brown fox jumps over the lazy dog.")
+       }
+}
+
+func BenchmarkJSStrEscaper(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               jsStrEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+       }
+}
+
+func BenchmarkJSRegexpEscaperNoSpecials(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               jsRegexpEscaper("The quick, brown fox jumps over the lazy dog")
+       }
+}
+
+func BenchmarkJSRegexpEscaper(b *testing.B) {
+       for i := 0; i < b.N; i++ {
+               jsRegexpEscaper("The <i>quick</i>,\r\n<span style='color:brown'>brown</span> fox jumps\u2028over the <canine class=\"lazy\">dog</canine>")
+       }
+}
author	Mike Samuel <mikesamuel@gmail.com>
	Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)
committer	Nigel Tao <nigeltao@golang.org>
	Thu, 1 Sep 2011 02:03:40 +0000 (12:03 +1000)
src/pkg/exp/template/html/Makefile		patch \| blob \| history
src/pkg/exp/template/html/context.go		patch \| blob \| history
src/pkg/exp/template/html/escape.go		patch \| blob \| history
src/pkg/exp/template/html/escape_test.go		patch \| blob \| history
src/pkg/exp/template/html/js.go	[new file with mode: 0644]	patch \| blob
src/pkg/exp/template/html/js_test.go	[new file with mode: 0644]	patch \| blob