go/doc: simplify and robustify link detection logic

author Joe Tsai <joetsai@digital-static.net>

Fri, 16 Feb 2018 22:52:10 +0000 (14:52 -0800)

committer Robert Griesemer <gri@golang.org>

Fri, 14 Dec 2018 00:41:25 +0000 (00:41 +0000)
author Joe Tsai <joetsai@digital-static.net>
Fri, 16 Feb 2018 22:52:10 +0000 (14:52 -0800)
committer Robert Griesemer <gri@golang.org>
Fri, 14 Dec 2018 00:41:25 +0000 (00:41 +0000)
diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go

index d2c026ea70290de8faf2064b622e73ff5f38c7ec..0ec42643fdf82fa5f3c5fba537602b70dfc83fae 100644 (file)
--- a/src/go/doc/comment.go
+++ b/src/go/doc/comment.go
@@ -54,7 +54,7 @@ const (
         identRx = `[\pL_][\pL_0-9]*`
  
         // Regexp for URLs
-       // Match parens, and check in pairedParensPrefixLen for balance - see #5043
+       // Match parens, and check later for balance - see #5043, #22285
         // Match .,:;?! within path, but not at end - see #18139, #16565
         // This excludes some rare yet valid urls ending in common punctuation
         // in order to allow sentences ending in URLs.
@@ -86,29 +86,6 @@ var (
         html_endh   = []byte("</h3>\n")
  )
  
-// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
-func pairedParensPrefixLen(s string) int {
-       parens := 0
-       l := len(s)
-       for i, ch := range s {
-               switch ch {
-               case '(':
-                       if parens == 0 {
-                               l = i
-                       }
-                       parens++
-               case ')':
-                       parens--
-                       if parens == 0 {
-                               l = len(s)
-                       } else if parens < 0 {
-                               return i
-                       }
-               }
-       }
-       return l
-}
-
  // Emphasize and escape a line of text for HTML. URLs are converted into links;
  // if the URL also appears in the words map, the link is taken from the map (if
  // the corresponding map value is the empty string, the URL is not converted
@@ -128,13 +105,27 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
                 // write text before match
                 commentEscape(w, line[0:m[0]], nice)
  
-               // adjust match if necessary
+               // adjust match for URLs
                 match := line[m[0]:m[1]]
-               if n := pairedParensPrefixLen(match); n < len(match) {
-                       // match contains unpaired parentheses (rare);
-                       // redo matching with shortened line for correct indices
-                       m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
-                       match = match[:n]
+               if strings.Contains(match, "://") {
+                       m0, m1 := m[0], m[1]
+                       for _, s := range []string{"()", "{}", "[]"} {
+                               open, close := s[:1], s[1:] // E.g., "(" and ")"
+                               // require opening parentheses before closing parentheses (#22285)
+                               if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
+                                       m1 = m0 + i
+                                       match = line[m0:m1]
+                               }
+                               // require balanced pairs of parentheses (#5043)
+                               for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
+                                       m1 = strings.LastIndexAny(line[:m1], s)
+                                       match = line[m0:m1]
+                               }
+                       }
+                       if m1 != m[1] {
+                               // redo matching with shortened line for correct indices
+                               m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
+                       }
                 }
  
                 // analyze match
diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go

index 1e6cf84cdfb968470132ea62fbfa4e44d06a799d..e0adeb2f5cf107d50c4d291cc998377d3db6672d 100644 (file)
--- a/src/go/doc/comment_test.go
+++ b/src/go/doc/comment_test.go
@@ -151,6 +151,7 @@ func TestToText(t *testing.T) {
  var emphasizeTests = []struct {
         in, out string
  }{
+       {"", ""},
         {"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
         {"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
         {"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
@@ -169,7 +170,13 @@ var emphasizeTests = []struct {
         {"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
         {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
         {"javascript://is/not/linked", "javascript://is/not/linked"},
+       {"http://foo", `<a href="http://foo">http://foo</a>`},
+       {"art by [[https://www.example.com/person/][Person Name]]", `art by [[<a href="https://www.example.com/person/">https://www.example.com/person/</a>][Person Name]]`},
+       {"please visit (http://golang.org/)", `please visit (<a href="http://golang.org/">http://golang.org/</a>)`},
+       {"please visit http://golang.org/hello())", `please visit <a href="http://golang.org/hello()">http://golang.org/hello()</a>)`},
         {"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", `<a href="http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD">http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD</a>`},
+       {"https://foo.bar/bal/x(])", `<a href="https://foo.bar/bal/x(">https://foo.bar/bal/x(</a>])`}, // inner ] causes (]) to be cut off from URL
+       {"foo [ http://bar(])", `foo [ <a href="http://bar(">http://bar(</a>])`},                      // outer [ causes ]) to be cut off from URL
  }
  
  func TestEmphasize(t *testing.T) {
@@ -183,37 +190,6 @@ func TestEmphasize(t *testing.T) {
         }
  }
  
-var pairedParensPrefixLenTests = []struct {
-       in, out string
-}{
-       {"", ""},
-       {"foo", "foo"},
-       {"()", "()"},
-       {"foo()", "foo()"},
-       {"foo()()()", "foo()()()"},
-       {"foo()((()()))", "foo()((()()))"},
-       {"foo()((()()))bar", "foo()((()()))bar"},
-       {"foo)", "foo"},
-       {"foo))", "foo"},
-       {"foo)))))", "foo"},
-       {"(foo", ""},
-       {"((foo", ""},
-       {"(((((foo", ""},
-       {"(foo)", "(foo)"},
-       {"((((foo))))", "((((foo))))"},
-       {"foo()())", "foo()()"},
-       {"foo((()())", "foo"},
-       {"foo((()())) (() foo ", "foo((()())) "},
-}
-
-func TestPairedParensPrefixLen(t *testing.T) {
-       for i, tt := range pairedParensPrefixLenTests {
-               if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
-                       t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
-               }
-       }
-}
-
  func TestCommentEscape(t *testing.T) {
         commentTests := []struct {
                 in, out string
author	Joe Tsai <joetsai@digital-static.net>
	Fri, 16 Feb 2018 22:52:10 +0000 (14:52 -0800)
committer	Robert Griesemer <gri@golang.org>
	Fri, 14 Dec 2018 00:41:25 +0000 (00:41 +0000)
src/go/doc/comment.go		patch \| blob \| history
src/go/doc/comment_test.go		patch \| blob \| history