From: Nigel Tao Date: Tue, 23 Aug 2011 03:22:26 +0000 (+1000) Subject: exp/template/html: differentiate URL-valued attributes (such as href) X-Git-Tag: weekly.2011-09-01~126 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9969803f6ccddc503d7595237cde81d3f3c55466;p=gostls13.git exp/template/html: differentiate URL-valued attributes (such as href) from others (such as title) during escaping. R=r, mikesamuel, dsymonds CC=golang-dev https://golang.org/cl/4919042 --- diff --git a/src/pkg/exp/template/html/Makefile b/src/pkg/exp/template/html/Makefile index 2f107da111..6d8ff5cd14 100644 --- a/src/pkg/exp/template/html/Makefile +++ b/src/pkg/exp/template/html/Makefile @@ -6,6 +6,7 @@ include ../../../../Make.inc TARG=exp/template/html GOFILES=\ - escape.go + context.go\ + escape.go\ include ../../../../Make.pkg diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go index 4110068834..5ef3b78146 100644 --- a/src/pkg/exp/template/html/context.go +++ b/src/pkg/exp/template/html/context.go @@ -16,56 +16,57 @@ import ( // http://www.w3.org/TR/html5/the-end.html#parsing-html-fragments // where the context element is null. type context struct { - state state - delim delim + state state + delim delim + errLine int + errStr string } -func (c context) String() string { - return fmt.Sprintf("context{state: %s, delim: %s", c.state, c.delim) -} - -// eq is true if the two contexts are identical field-wise. +// eq returns whether two contexts are equal. func (c context) eq(d context) bool { - return c.state == d.state && c.delim == d.delim + return c.state == d.state && c.delim == d.delim && c.errLine == d.errLine && c.errStr == d.errStr } // state describes a high-level HTML parser state. // -// It bounds the top of the element stack, and by extension the HTML -// insertion mode, but also contains state that does not correspond to -// anything in the HTML5 parsing algorithm because a single token -// production in the HTML grammar may contain embedded actions in a template. -// For instance, the quoted HTML attribute produced by +// It bounds the top of the element stack, and by extension the HTML insertion +// mode, but also contains state that does not correspond to anything in the +// HTML5 parsing algorithm because a single token production in the HTML +// grammar may contain embedded actions in a template. For instance, the quoted +// HTML attribute produced by //
// is a single token in HTML's grammar but in a template spans several nodes. type state uint8 const ( - // statePCDATA is parsed character data. An HTML parser is in + // stateText is parsed character data. An HTML parser is in // this state when its parse position is outside an HTML tag, // directive, comment, and special element body. - statePCDATA state = iota + stateText state = iota // stateTag occurs before an HTML attribute or the end of a tag. stateTag - // stateURI occurs inside an HTML attribute whose content is a URI. - stateURI + // stateAttr occurs inside an HTML attribute whose content is text. + stateAttr + // stateURL occurs inside an HTML attribute whose content is a URL. + stateURL // stateError is an infectious error state outside any valid // HTML/CSS/JS construct. stateError ) var stateNames = [...]string{ - statePCDATA: "statePCDATA", - stateTag: "stateTag", - stateURI: "stateURI", - stateError: "stateError", + stateText: "stateText", + stateTag: "stateTag", + stateAttr: "stateAttr", + stateURL: "stateURL", + stateError: "stateError", } func (s state) String() string { - if uint(s) < uint(len(stateNames)) { + if int(s) < len(stateNames) { return stateNames[s] } - return fmt.Sprintf("illegal state %d", uint(s)) + return fmt.Sprintf("illegal state %d", s) } // delim is the delimiter that will end the current HTML attribute. @@ -91,8 +92,8 @@ var delimNames = [...]string{ } func (d delim) String() string { - if uint(d) < uint(len(delimNames)) { + if int(d) < len(delimNames) { return delimNames[d] } - return fmt.Sprintf("illegal delim %d", uint(d)) + return fmt.Sprintf("illegal delim %d", d) } diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go index e0e87b98d0..a0fccf96d1 100644 --- a/src/pkg/exp/template/html/escape.go +++ b/src/pkg/exp/template/html/escape.go @@ -2,104 +2,283 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// Package html is a specialization of exp/template that automates the +// Package html is a specialization of template that automates the // construction of safe HTML output. -// At the moment, the escaping is naive. All dynamic content is assumed to be -// plain text interpolated in an HTML PCDATA context. +// INCOMPLETE. package html import ( + "bytes" + "fmt" + "os" + "strings" "template" "template/parse" ) -// Escape rewrites each action in the template to guarantee the output is +// Escape rewrites each action in the template to guarantee that the output is // HTML-escaped. -func Escape(t *template.Template) { - // If the parser shares trees based on common-subexpression - // joining then we will need to avoid multiply escaping the same action. - escapeListNode(t.Tree.Root) +func Escape(t *template.Template) (*template.Template, os.Error) { + c := escapeList(context{}, t.Tree.Root) + if c.errStr != "" { + return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr) + } + return t, nil } -// escapeNode dispatches to escape helpers by type. -func escapeNode(node parse.Node) { - switch n := node.(type) { - case *parse.ListNode: - escapeListNode(n) - case *parse.TextNode: - // Nothing to do. +// escape escapes a template node. +func escape(c context, n parse.Node) context { + switch n := n.(type) { case *parse.ActionNode: - escapeActionNode(n) + return escapeAction(c, n) case *parse.IfNode: - escapeIfNode(n) + return escapeBranch(c, &n.BranchNode, "if") + case *parse.ListNode: + return escapeList(c, n) case *parse.RangeNode: - escapeRangeNode(n) - case *parse.TemplateNode: - // Nothing to do. + return escapeBranch(c, &n.BranchNode, "range") + case *parse.TextNode: + return escapeText(c, n) case *parse.WithNode: - escapeWithNode(n) - default: - panic("handling for " + node.String() + " not implemented") - // TODO: Handle other inner node types. + return escapeBranch(c, &n.BranchNode, "with") } + // TODO: handle a *parse.TemplateNode. Should Escape take a *template.Set? + panic("escaping " + n.String() + " is unimplemented") } -// escapeListNode recursively escapes its input's children. -func escapeListNode(node *parse.ListNode) { - if node == nil { - return +// escapeAction escapes an action template node. +func escapeAction(c context, n *parse.ActionNode) context { + sanitizer := "html" + if c.state == stateURL { + sanitizer = "urlquery" } - children := node.Nodes - for _, child := range children { - escapeNode(child) + // If the pipe already ends with the sanitizer, do not interfere. + if m := len(n.Pipe.Cmds); m != 0 { + if last := n.Pipe.Cmds[m-1]; len(last.Args) != 0 { + if i, ok := last.Args[0].(*parse.IdentifierNode); ok && i.Ident == sanitizer { + return c + } + } } + // Otherwise, append the sanitizer. + n.Pipe.Cmds = append(n.Pipe.Cmds, &parse.CommandNode{ + NodeType: parse.NodeCommand, + Args: []parse.Node{parse.NewIdentifier(sanitizer)}, + }) + return c } -// escapeActionNode adds a pipeline call to the end that escapes the result -// of the expression before it is interpolated into the template output. -func escapeActionNode(node *parse.ActionNode) { - pipe := node.Pipe +// join joins the two contexts of a branch template node. The result is an +// error context if either of the input contexts are error contexts, or if the +// the input contexts differ. +func join(a, b context, line int, nodeName string) context { + if a.state == stateError { + return a + } + if b.state == stateError { + return b + } + if a.eq(b) { + return a + } + return context{ + state: stateError, + errLine: line, + errStr: fmt.Sprintf("{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), + } +} - cmds := pipe.Cmds - nCmds := len(cmds) +// escapeBranch escapes a branch template node: "if", "range" and "with". +func escapeBranch(c context, n *parse.BranchNode, nodeName string) context { + c0 := escapeList(c, n.List) + if nodeName == "range" { + // The "true" branch of a "range" node can execute multiple times. + // We check that executing n.List once results in the same context + // as executing n.List twice. + c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName) + } + c1 := escapeList(c, n.ElseList) + return join(c0, c1, n.Line, nodeName) +} + +// escapeList escapes a list template node. +func escapeList(c context, n *parse.ListNode) context { + if n == nil { + return c + } + for _, m := range n.Nodes { + c = escape(c, m) + } + return c +} - // If it already has an escaping command, do not interfere. - if nCmds != 0 { - if lastCmd := cmds[nCmds-1]; len(lastCmd.Args) != 0 { - // TODO: Recognize url and js as escaping functions once - // we have enough context to know whether additional - // escaping is necessary. - if arg, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok && arg.Ident == "html" { - return +// escapeText escapes a text template node. +func escapeText(c context, n *parse.TextNode) context { + for s := n.Text; len(s) > 0; { + c, s = transitionFunc[c.state](c, s) + } + return c +} + +// transitionFunc is the array of context transition functions for text nodes. +// A transition function takes a context and template text input, and returns +// the updated context and any unconsumed text. +var transitionFunc = [...]func(context, []byte) (context, []byte){ + stateText: tText, + stateTag: tTag, + stateURL: tURL, + stateAttr: tAttr, + stateError: tError, +} + +// tText is the context transition function for the text state. +func tText(c context, s []byte) (context, []byte) { + for { + i := bytes.IndexByte(s, '<') + if i == -1 || i+1 == len(s) { + return c, nil + } + i++ + if s[i] == '/' { + if i+1 == len(s) { + return c, nil } + i++ } + j := eatTagName(s, i) + if j != i { + // We've found an HTML tag. + return context{state: stateTag}, s[j:] + } + s = s[j:] } + panic("unreachable") +} - htmlEscapeCommand := parse.CommandNode{ - NodeType: parse.NodeCommand, - Args: []parse.Node{parse.NewIdentifier("html")}, +// tTag is the context transition function for the tag state. +func tTag(c context, s []byte) (context, []byte) { + // Skip to the end tag, if there is one. + i := bytes.IndexByte(s, '>') + if i != -1 { + return context{state: stateText}, s[i+1:] } - node.Pipe.Cmds = append(node.Pipe.Cmds, &htmlEscapeCommand) + // Otherwise, find the attribute name. + i = eatWhiteSpace(s, 0) + attrStart, i := i, eatAttrName(s, i) + if i == len(s) { + return context{state: stateTag}, nil + } + state := stateAttr + if urlAttr[strings.ToLower(string(s[attrStart:i]))] { + state = stateURL + } + + // Consume the "=". + i = eatWhiteSpace(s, i) + if i == len(s) || s[i] != '=' { + return context{state: stateTag}, s[i:] + } + i = eatWhiteSpace(s, i+1) + + // Find the delimiter. + if i == len(s) { + return context{state: state, delim: delimSpaceOrTagEnd}, nil + } + switch s[i] { + case '\'': + return context{state: state, delim: delimSingleQuote}, s[i+1:] + case '"': + return context{state: state, delim: delimDoubleQuote}, s[i+1:] + } + + // TODO: This shouldn't be an error: `", - G: "", - H: "", - A: []string{"", ""}, - E: []string{}, -} - -type testCase struct { - name string - input string - output string -} +func TestEscape(t *testing.T) { + var data = struct { + F, T bool + C, G, H string + A, E []string + }{ + F: false, + T: true, + C: "", + G: "", + H: "", + A: []string{"", ""}, + E: []string{}, + } -var testCases = []testCase{ - {"if", "{{if .T}}Hello{{end}}, {{.C}}!", "Hello, <Cincinatti>!"}, - {"else", "{{if .F}}{{.H}}{{else}}{{.G}}{{end}}!", "<Goodbye>!"}, - {"overescaping", "Hello, {{.C | html}}!", "Hello, <Cincinatti>!"}, - {"assignment", "{{if $x := .H}}{{$x}}{{end}}", "<Hello>"}, - {"withBody", "{{with .H}}{{.}}{{end}}", "<Hello>"}, - {"withElse", "{{with .E}}{{.}}{{else}}{{.H}}{{end}}", "<Hello>"}, - {"rangeBody", "{{range .A}}{{.}}{{end}}", "<a><b>"}, - {"rangeElse", "{{range .E}}{{.}}{{else}}{{.H}}{{end}}", "<Hello>"}, - {"nonStringValue", "{{.T}}", "true"}, - {"constant", ``, ``}, -} + var testCases = []struct { + name string + input string + output string + }{ + { + "if", + "{{if .T}}Hello{{end}}, {{.C}}!", + "Hello, <Cincinatti>!", + }, + { + "else", + "{{if .F}}{{.H}}{{else}}{{.G}}{{end}}!", + "<Goodbye>!", + }, + { + "overescaping", + "Hello, {{.C | html}}!", + "Hello, <Cincinatti>!", + }, + { + "assignment", + "{{if $x := .H}}{{$x}}{{end}}", + "<Hello>", + }, + { + "withBody", + "{{with .H}}{{.}}{{end}}", + "<Hello>", + }, + { + "withElse", + "{{with .E}}{{.}}{{else}}{{.H}}{{end}}", + "<Hello>", + }, + { + "rangeBody", + "{{range .A}}{{.}}{{end}}", + "<a><b>", + }, + { + "rangeElse", + "{{range .E}}{{.}}{{else}}{{.H}}{{end}}", + "<Hello>", + }, + { + "nonStringValue", + "{{.T}}", + "true", + }, + { + // TODO: Make sure the URL escaper escapes single quotes so it can + // be embedded in single quoted URI attributes and CSS url(...) + // constructs. Single quotes are reserved in URLs, but are only used + // in the obsolete "mark" rule in an appendix in RFC 3986 so can be + // safely encoded. + "constant", + ``, + ``, + }, + } -func TestAutoesc(t *testing.T) { - for _, testCase := range testCases { - name := testCase.name - tmpl := template.New(name) - tmpl, err := tmpl.Parse(testCase.input) + for _, tc := range testCases { + tmpl, err := template.New(tc.name).Parse(tc.input) if err != nil { - t.Errorf("%s: failed to parse template: %s", name, err) + t.Errorf("%s: template parsing failed: %s", tc.name, err) continue } - Escape(tmpl) + b := new(bytes.Buffer) + if err = tmpl.Execute(b, data); err != nil { + t.Errorf("%s: template execution failed: %s", tc.name, err) + continue + } + if w, g := tc.output, b.String(); w != g { + t.Errorf("%s: escaped output: want %q got %q", tc.name, w, g) + continue + } + } +} - buffer := new(bytes.Buffer) +func TestErrors(t *testing.T) { + var testCases = []struct { + input string + err string + }{ + // Non-error cases. + { + "{{if .Cond}}{{else}}{{end}}", + "", + }, + { + "{{if .Cond}}{{end}}", + "", + }, + { + "{{if .Cond}}{{else}}{{end}}", + "", + }, + { + "{{with .Cond}}