exp/template/html: simplify transition functions

author Mike Samuel <mikesamuel@gmail.com>

Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)

committer Mike Samuel <mikesamuel@gmail.com>

Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)
author Mike Samuel <mikesamuel@gmail.com>
Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)
committer Mike Samuel <mikesamuel@gmail.com>
Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)
diff --git a/src/pkg/exp/template/html/context.go b/src/pkg/exp/template/html/context.go

index 57d44938ca66169305fb71a06f1beb5b0d0e307f..de073f134a388d49ef489654c8c12a7c9af07ded 100644 (file)
--- a/src/pkg/exp/template/html/context.go
+++ b/src/pkg/exp/template/html/context.go
@@ -175,6 +175,15 @@ func isComment(s state) bool {
         return false
  }
  
+// isInTag return whether s occurs solely inside an HTML tag.
+func isInTag(s state) bool {
+       switch s {
+       case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
+               return true
+       }
+       return false
+}
+
  // delim is the delimiter that will end the current HTML attribute.
  type delim uint8
  
diff --git a/src/pkg/exp/template/html/escape.go b/src/pkg/exp/template/html/escape.go

index 050746c1b2a46e5fce93f53e5e4c5b8058cbf93a..28019f252522c84053c76e7f1b6c6f45a102ca1f 100644 (file)
--- a/src/pkg/exp/template/html/escape.go
+++ b/src/pkg/exp/template/html/escape.go
@@ -583,7 +583,14 @@ func (e *escaper) escapeText(c context, n *parse.TextNode) context {
  // s, then returns the context after those tokens and the unprocessed suffix.
  func contextAfterText(c context, s []byte) (context, int) {
         if c.delim == delimNone {
-               return transitionFunc[c.state](c, s)
+               c1, i := tSpecialTagEnd(c, s)
+               if i == 0 {
+                       // A special end tag (`</script>`) has been seen and
+                       // all content preceding it has been consumed.
+                       return c1, 0
+               }
+               // Consider all content up to any end tag.
+               return transitionFunc[c.state](c, s[:i])
         }
  
         i := bytes.IndexAny(s, delimEnds[c.delim])
diff --git a/src/pkg/exp/template/html/escape_test.go b/src/pkg/exp/template/html/escape_test.go

index cf1c8280028c9d5e2fc1969e9a2b6635e48202da..84bf6b7a4ab67174a0e03e17407da3977109c03d 100644 (file)
--- a/src/pkg/exp/template/html/escape_test.go
+++ b/src/pkg/exp/template/html/escape_test.go
@@ -814,7 +814,7 @@ func TestErrors(t *testing.T) {
                 },
                 {
                         `<a onclick='alert(/x+\`,
-                       `unfinished escape sequence in JS regexp: "x+\\"`,
+                       `unfinished escape sequence in JS string: "x+\\"`,
                 },
                 {
                         `<a onclick="/foo[\]/`,
diff --git a/src/pkg/exp/template/html/html.go b/src/pkg/exp/template/html/html.go

index 27ef65229a60e04d9bb0f031188dd50540caa61e..3924b193db0a19f7f8096afddf3a5d2123079fcd 100644 (file)
--- a/src/pkg/exp/template/html/html.go
+++ b/src/pkg/exp/template/html/html.go
@@ -165,12 +165,17 @@ func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
  // For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
  func stripTags(html string) string {
         var b bytes.Buffer
-       s, c, i := []byte(html), context{}, 0
+       s, c, i, allText := []byte(html), context{}, 0, true
         // Using the transition funcs helps us avoid mangling
         // `<div title="1>2">` or `I <3 Ponies!`.
         for i != len(s) {
                 if c.delim == delimNone {
-                       d, nread := transitionFunc[c.state](c, s[i:])
+                       st := c.state
+                       // Use RCDATA instead of parsing into JS or CSS styles.
+                       if c.element != elementNone && !isInTag(st) {
+                               st = stateRCDATA
+                       }
+                       d, nread := transitionFunc[st](c, s[i:])
                         i1 := i + nread
                         if c.state == stateText || c.state == stateRCDATA {
                                 // Emit text up to the start of the tag or comment.
@@ -184,6 +189,8 @@ func stripTags(html string) string {
                                         }
                                 }
                                 b.Write(s[i:j])
+                       } else {
+                               allText = false
                         }
                         c, i = d, i1
                         continue
@@ -198,10 +205,9 @@ func stripTags(html string) string {
                 }
                 c, i = context{state: stateTag, element: c.element}, i1
         }
-       if c.state == stateText {
-               if b.Len() == 0 {
-                       return html
-               }
+       if allText {
+               return html
+       } else if c.state == stateText || c.state == stateRCDATA {
                 b.Write(s[i:])
         }
         return b.String()
diff --git a/src/pkg/exp/template/html/html_test.go b/src/pkg/exp/template/html/html_test.go

index 2866fdd0ce1b15a9c4ddc04478804b7b4ab6fb53..e178d0f27e5f678764da08f8a5745f41de6b0682 100644 (file)
--- a/src/pkg/exp/template/html/html_test.go
+++ b/src/pkg/exp/template/html/html_test.go
@@ -59,6 +59,7 @@ func TestStripTags(t *testing.T) {
                 {`Foo<script type="text/javascript">alert(1337)</script>Bar`, "FooBar"},
                 {`Foo<div title="1>2">Bar`, "FooBar"},
                 {`I <3 Ponies!`, `I <3 Ponies!`},
+               {`<script>foo()</script>`, ``},
         }
  
         for _, test := range tests {
diff --git a/src/pkg/exp/template/html/transition.go b/src/pkg/exp/template/html/transition.go

index 0274d9e7101dd5670f905fc251b3d9b225fc232e..3be3a01a8aecc62f2528fa801f811405c93abd2a 100644 (file)
--- a/src/pkg/exp/template/html/transition.go
+++ b/src/pkg/exp/template/html/transition.go
@@ -27,9 +27,9 @@ var transitionFunc = [...]func(context, []byte) (context, int){
         stateAttr:        tAttr,
         stateURL:         tURL,
         stateJS:          tJS,
-       stateJSDqStr:     tJSStr,
-       stateJSSqStr:     tJSStr,
-       stateJSRegexp:    tJSRegexp,
+       stateJSDqStr:     tJSDelimited,
+       stateJSSqStr:     tJSDelimited,
+       stateJSRegexp:    tJSDelimited,
         stateJSBlockCmt:  tBlockCmt,
         stateJSLineCmt:   tLineCmt,
         stateCSS:         tCSS,
@@ -57,14 +57,18 @@ func tText(c context, s []byte) (context, int) {
                         return context{state: stateHTMLCmt}, i + 4
                 }
                 i++
+               end := false
                 if s[i] == '/' {
                         if i+1 == len(s) {
                                 return c, len(s)
                         }
-                       i++
+                       end, i = true, i+1
                 }
                 j, e := eatTagName(s, i)
                 if j != i {
+                       if end {
+                               e = elementNone
+                       }
                         // We've found an HTML tag.
                         return context{state: stateTag, element: e}, j
                 }
@@ -122,10 +126,9 @@ func tAttrName(c context, s []byte) (context, int) {
         i, err := eatAttrName(s, 0)
         if err != nil {
                 return context{state: stateError, err: err}, len(s)
-       } else if i == len(s) {
-               return c, len(s)
+       } else if i != len(s) {
+               c.state = stateAfterName
         }
-       c.state = stateAfterName
         return c, i
  }
  
@@ -172,8 +175,7 @@ func tBeforeValue(c context, s []byte) (context, int) {
  
  // tHTMLCmt is the context transition function for stateHTMLCmt.
  func tHTMLCmt(c context, s []byte) (context, int) {
-       i := bytes.Index(s, commentEnd)
-       if i != -1 {
+       if i := bytes.Index(s, commentEnd); i != -1 {
                 return context{}, i + 3
         }
         return c, len(s)
@@ -192,10 +194,8 @@ var specialTagEndMarkers = [...]string{
  // element states.
  func tSpecialTagEnd(c context, s []byte) (context, int) {
         if c.element != elementNone {
-               end := specialTagEndMarkers[c.element]
-               i := strings.Index(strings.ToLower(string(s)), end)
-               if i != -1 {
-                       return context{state: stateTag}, i + len(end)
+               if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
+                       return context{}, i
                 }
         }
         return c, len(s)
@@ -220,10 +220,6 @@ func tURL(c context, s []byte) (context, int) {
  
  // tJS is the context transition function for the JS state.
  func tJS(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
-
         i := bytes.IndexAny(s, `"'/`)
         if i == -1 {
                 // Entire input is non string, comment, regexp tokens.
@@ -258,64 +254,30 @@ func tJS(c context, s []byte) (context, int) {
         return c, i + 1
  }
  
-// tJSStr is the context transition function for the JS string states.
-func tJSStr(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
-
-       quoteAndEsc := `\"`
-       if c.state == stateJSSqStr {
-               quoteAndEsc = `\'`
-       }
-
-       k := 0
-       for {
-               i := k + bytes.IndexAny(s[k:], quoteAndEsc)
-               if i < k {
-                       return c, len(s)
-               }
-               if s[i] == '\\' {
-                       i++
-                       if i == len(s) {
-                               return context{
-                                       state: stateError,
-                                       err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
-                               }, len(s)
-                       }
-               } else {
-                       c.state, c.jsCtx = stateJS, jsCtxDivOp
-                       return c, i + 1
-               }
-               k = i + 1
-       }
-       panic("unreachable")
-}
-
-// tJSRegexp is the context transition function for the /RegExp/ literal state.
-func tJSRegexp(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
+// tJSDelimited is the context transition function for the JS string and regexp
+// states.
+func tJSDelimited(c context, s []byte) (context, int) {
+       specials := `\"`
+       switch c.state {
+       case stateJSSqStr:
+               specials = `\'`
+       case stateJSRegexp:
+               specials = `\/[]`
         }
  
         k, inCharset := 0, false
         for {
-               i := k + bytes.IndexAny(s[k:], `\/[]`)
+               i := k + bytes.IndexAny(s[k:], specials)
                 if i < k {
                         break
                 }
                 switch s[i] {
-               case '/':
-                       if !inCharset {
-                               c.state, c.jsCtx = stateJS, jsCtxDivOp
-                               return c, i + 1
-                       }
                 case '\\':
                         i++
                         if i == len(s) {
                                 return context{
                                         state: stateError,
-                                       err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS regexp: %q", s),
+                                       err:   errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s),
                                 }, len(s)
                         }
                 case '[':
@@ -323,7 +285,11 @@ func tJSRegexp(c context, s []byte) (context, int) {
                 case ']':
                         inCharset = false
                 default:
-                       panic("unreachable")
+                       // end delimiter
+                       if !inCharset {
+                               c.state, c.jsCtx = stateJS, jsCtxDivOp
+                               return c, i + 1
+                       }
                 }
                 k = i + 1
         }
@@ -344,9 +310,6 @@ var blockCommentEnd = []byte("*/")
  
  // tBlockCmt is the context transition function for /*comment*/ states.
  func tBlockCmt(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
         i := bytes.Index(s, blockCommentEnd)
         if i == -1 {
                 return c, len(s)
@@ -364,9 +327,6 @@ func tBlockCmt(c context, s []byte) (context, int) {
  
  // tLineCmt is the context transition function for //comment states.
  func tLineCmt(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
         var lineTerminators string
         var endState state
         switch c.state {
@@ -400,10 +360,6 @@ func tLineCmt(c context, s []byte) (context, int) {
  
  // tCSS is the context transition function for the CSS state.
  func tCSS(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
-
         // CSS quoted strings are almost never used except for:
         // (1) URLs as in background: "/foo.png"
         // (2) Multiword font-names as in font-family: "Times New Roman"
@@ -478,10 +434,6 @@ func tCSS(c context, s []byte) (context, int) {
  
  // tCSSStr is the context transition function for the CSS string and URL states.
  func tCSSStr(c context, s []byte) (context, int) {
-       if d, i := tSpecialTagEnd(c, s); i != len(s) {
-               return d, i
-       }
-
         var endAndEsc string
         switch c.state {
         case stateCSSDqStr, stateCSSDqURL:
author	Mike Samuel <mikesamuel@gmail.com>
	Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)
committer	Mike Samuel <mikesamuel@gmail.com>
	Thu, 22 Sep 2011 02:04:41 +0000 (19:04 -0700)
src/pkg/exp/template/html/context.go		patch \| blob \| history
src/pkg/exp/template/html/escape.go		patch \| blob \| history
src/pkg/exp/template/html/escape_test.go		patch \| blob \| history
src/pkg/exp/template/html/html.go		patch \| blob \| history
src/pkg/exp/template/html/html_test.go		patch \| blob \| history
src/pkg/exp/template/html/transition.go		patch \| blob \| history