go/doc: fix URL matched in ToHTML

author Robert Griesemer <gri@golang.org>

Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)

committer Robert Griesemer <gri@golang.org>

Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
author Robert Griesemer <gri@golang.org>
Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
committer Robert Griesemer <gri@golang.org>
Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go

index 274a625cf0d807a74cc5ee0bdff58707c60cdec3..f414ca4090e27dfbdb26774969eecb1853468045 100644 (file)
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -45,13 +45,13 @@ func commentEscape(w io.Writer, text string, nice bool) {
  
  const (
         // Regexp for Go identifiers
-       identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
+       identRx = `[\pL_][\pL_0-9]*`
  
         // Regexp for URLs
-       protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
+       protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
         hostPart = `[a-zA-Z0-9_@\-]+`
-       filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
-       urlRx    = protocol + `//` + // http://
+       filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
+       urlRx    = `(` + protocol + `)://` +   // http://
                 hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
                 filePart + `([:.,]` + filePart + `)*`
  )
@@ -73,6 +73,29 @@ var (
         html_endh   = []byte("</h3>\n")
  )
  
+// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
+func pairedParensPrefixLen(s string) int {
+       parens := 0
+       l := len(s)
+       for i, ch := range s {
+               switch ch {
+               case '(':
+                       if parens == 0 {
+                               l = i
+                       }
+                       parens++
+               case ')':
+                       parens--
+                       if parens == 0 {
+                               l = len(s)
+                       } else if parens < 0 {
+                               return i
+                       }
+               }
+       }
+       return l
+}
+
  // Emphasize and escape a line of text for HTML. URLs are converted into links;
  // if the URL also appears in the words map, the link is taken from the map (if
  // the corresponding map value is the empty string, the URL is not converted
@@ -92,18 +115,26 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
                 // write text before match
                 commentEscape(w, line[0:m[0]], nice)
  
-               // analyze match
+               // adjust match if necessary
                 match := line[m[0]:m[1]]
+               if n := pairedParensPrefixLen(match); n < len(match) {
+                       // match contains unpaired parentheses (rare);
+                       // redo matching with shortened line for correct indices
+                       m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
+                       match = match[:n]
+               }
+
+               // analyze match
                 url := ""
                 italics := false
                 if words != nil {
-                       url, italics = words[string(match)]
+                       url, italics = words[match]
                 }
                 if m[2] >= 0 {
                         // match against first parenthesized sub-regexp; must be match against urlRx
                         if !italics {
                                 // no alternative URL in words list, use match instead
-                               url = string(match)
+                               url = match
                         }
                         italics = false // don't italicize URLs
                 }
diff --git a/src/pkg/go/doc/comment_test.go b/src/pkg/go/doc/comment_test.go

index 9f29a61153150e86ede683cb58819a8291d475f5..ad65c2a27f8a09d22237f59287e0a06cd91c4a5c 100644 (file)
--- a/src/pkg/go/doc/comment_test.go
+++ b/src/pkg/go/doc/comment_test.go
@@ -148,13 +148,16 @@ func TestToText(t *testing.T) {
  }
  
  var emphasizeTests = []struct {
-       in  string
-       out string
+       in, out string
  }{
         {"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
         {"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
         {"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},
+       {"http://en.wikipedia.org/wiki/Camellia_(cipher)", `<a href="http://en.wikipedia.org/wiki/Camellia_(cipher)">http://en.wikipedia.org/wiki/Camellia_(cipher)</a>`},
         {"(http://www.google.com/)", `(<a href="http://www.google.com/">http://www.google.com/</a>)`},
+       {"http://gmail.com)", `<a href="http://gmail.com">http://gmail.com</a>)`},
+       {"((http://gmail.com))", `((<a href="http://gmail.com">http://gmail.com</a>))`},
+       {"http://gmail.com ((http://gmail.com)) ()", `<a href="http://gmail.com">http://gmail.com</a> ((<a href="http://gmail.com">http://gmail.com</a>)) ()`},
         {"Foo bar http://example.com/ quux!", `Foo bar <a href="http://example.com/">http://example.com/</a> quux!`},
         {"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
         {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
@@ -171,3 +174,34 @@ func TestEmphasize(t *testing.T) {
                 }
         }
  }
+
+var pairedParensPrefixLenTests = []struct {
+       in, out string
+}{
+       {"", ""},
+       {"foo", "foo"},
+       {"()", "()"},
+       {"foo()", "foo()"},
+       {"foo()()()", "foo()()()"},
+       {"foo()((()()))", "foo()((()()))"},
+       {"foo()((()()))bar", "foo()((()()))bar"},
+       {"foo)", "foo"},
+       {"foo))", "foo"},
+       {"foo)))))", "foo"},
+       {"(foo", ""},
+       {"((foo", ""},
+       {"(((((foo", ""},
+       {"(foo)", "(foo)"},
+       {"((((foo))))", "((((foo))))"},
+       {"foo()())", "foo()()"},
+       {"foo((()())", "foo"},
+       {"foo((()())) (() foo ", "foo((()())) "},
+}
+
+func TestPairedParensPrefixLen(t *testing.T) {
+       for i, tt := range pairedParensPrefixLenTests {
+               if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
+                       t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
+               }
+       }
+}
author	Robert Griesemer <gri@golang.org>
	Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
committer	Robert Griesemer <gri@golang.org>
	Tue, 8 Apr 2014 20:51:44 +0000 (13:51 -0700)
src/pkg/go/doc/comment.go		patch \| blob \| history
src/pkg/go/doc/comment_test.go		patch \| blob \| history