]> Cypherpunks repositories - gostls13.git/commitdiff
go/doc: fix URL matched in ToHTML
authorRobert Griesemer <gri@golang.org>
Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
committerRobert Griesemer <gri@golang.org>
Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
Permit paired parentheses in URLs such as:

http://en.wikipedia.org/wiki/Camellia_(cipher)

Fixes #5043.

LGTM=rsc
R=rsc
CC=golang-codereviews
https://golang.org/cl/85610043

src/pkg/go/doc/comment.go
src/pkg/go/doc/comment_test.go

index 274a625cf0d807a74cc5ee0bdff58707c60cdec3..f414ca4090e27dfbdb26774969eecb1853468045 100644 (file)
@@ -45,13 +45,13 @@ func commentEscape(w io.Writer, text string, nice bool) {
 
 const (
        // Regexp for Go identifiers
-       identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
+       identRx = `[\pL_][\pL_0-9]*`
 
        // Regexp for URLs
-       protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
+       protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
        hostPart = `[a-zA-Z0-9_@\-]+`
-       filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
-       urlRx    = protocol + `//` + // http://
+       filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
+       urlRx    = `(` + protocol + `)://` +   // http://
                hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
                filePart + `([:.,]` + filePart + `)*`
 )
@@ -73,6 +73,29 @@ var (
        html_endh   = []byte("</h3>\n")
 )
 
+// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
+func pairedParensPrefixLen(s string) int {
+       parens := 0
+       l := len(s)
+       for i, ch := range s {
+               switch ch {
+               case '(':
+                       if parens == 0 {
+                               l = i
+                       }
+                       parens++
+               case ')':
+                       parens--
+                       if parens == 0 {
+                               l = len(s)
+                       } else if parens < 0 {
+                               return i
+                       }
+               }
+       }
+       return l
+}
+
 // Emphasize and escape a line of text for HTML. URLs are converted into links;
 // if the URL also appears in the words map, the link is taken from the map (if
 // the corresponding map value is the empty string, the URL is not converted
@@ -92,18 +115,26 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
                // write text before match
                commentEscape(w, line[0:m[0]], nice)
 
-               // analyze match
+               // adjust match if necessary
                match := line[m[0]:m[1]]
+               if n := pairedParensPrefixLen(match); n < len(match) {
+                       // match contains unpaired parentheses (rare);
+                       // redo matching with shortened line for correct indices
+                       m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
+                       match = match[:n]
+               }
+
+               // analyze match
                url := ""
                italics := false
                if words != nil {
-                       url, italics = words[string(match)]
+                       url, italics = words[match]
                }
                if m[2] >= 0 {
                        // match against first parenthesized sub-regexp; must be match against urlRx
                        if !italics {
                                // no alternative URL in words list, use match instead
-                               url = string(match)
+                               url = match
                        }
                        italics = false // don't italicize URLs
                }
index 9f29a61153150e86ede683cb58819a8291d475f5..ad65c2a27f8a09d22237f59287e0a06cd91c4a5c 100644 (file)
@@ -148,13 +148,16 @@ func TestToText(t *testing.T) {
 }
 
 var emphasizeTests = []struct {
-       in  string
-       out string
+       in, out string
 }{
        {"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
        {"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
        {"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},
+       {"http://en.wikipedia.org/wiki/Camellia_(cipher)", `<a href="http://en.wikipedia.org/wiki/Camellia_(cipher)">http://en.wikipedia.org/wiki/Camellia_(cipher)</a>`},
        {"(http://www.google.com/)", `(<a href="http://www.google.com/">http://www.google.com/</a>)`},
+       {"http://gmail.com)", `<a href="http://gmail.com">http://gmail.com</a>)`},
+       {"((http://gmail.com))", `((<a href="http://gmail.com">http://gmail.com</a>))`},
+       {"http://gmail.com ((http://gmail.com)) ()", `<a href="http://gmail.com">http://gmail.com</a> ((<a href="http://gmail.com">http://gmail.com</a>)) ()`},
        {"Foo bar http://example.com/ quux!", `Foo bar <a href="http://example.com/">http://example.com/</a> quux!`},
        {"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
        {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
@@ -171,3 +174,34 @@ func TestEmphasize(t *testing.T) {
                }
        }
 }
+
+var pairedParensPrefixLenTests = []struct {
+       in, out string
+}{
+       {"", ""},
+       {"foo", "foo"},
+       {"()", "()"},
+       {"foo()", "foo()"},
+       {"foo()()()", "foo()()()"},
+       {"foo()((()()))", "foo()((()()))"},
+       {"foo()((()()))bar", "foo()((()()))bar"},
+       {"foo)", "foo"},
+       {"foo))", "foo"},
+       {"foo)))))", "foo"},
+       {"(foo", ""},
+       {"((foo", ""},
+       {"(((((foo", ""},
+       {"(foo)", "(foo)"},
+       {"((((foo))))", "((((foo))))"},
+       {"foo()())", "foo()()"},
+       {"foo((()())", "foo"},
+       {"foo((()())) (() foo ", "foo((()())) "},
+}
+
+func TestPairedParensPrefixLen(t *testing.T) {
+       for i, tt := range pairedParensPrefixLenTests {
+               if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
+                       t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
+               }
+       }
+}