From: Joe Tsai Date: Fri, 16 Feb 2018 22:52:10 +0000 (-0800) Subject: go/doc: simplify and robustify link detection logic X-Git-Tag: go1.12beta1~56 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=190a5f8fd2f360c22c11b290796ae7fc0fa5607c;p=gostls13.git go/doc: simplify and robustify link detection logic To fix #5043, we added logic to allow balanced pairs of parenthesis so that we could match URLs like: http://example.com/some_resource(foo) Howewer, such logic breaks when parsing something like the following: art by [https://example.com/person][Person Name]]. such that the following is considered the link: https://example.com/person][Person Since the logic added in #5043 was just a heuristic, we adjust the heuristic that in addition to requiring balanced pairs, the first parenthesis must be an opening one. For further robustness, we apply this heuristic to parenthesis, braces, and brackets. Fixes #22285 Change-Id: I23b728a644e35ce3995b05a79129cad2c1e3b1ce Reviewed-on: https://go-review.googlesource.com/c/94876 Run-TryBot: Robert Griesemer TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- diff --git a/src/go/doc/comment.go b/src/go/doc/comment.go index d2c026ea70..0ec42643fd 100644 --- a/src/go/doc/comment.go +++ b/src/go/doc/comment.go @@ -54,7 +54,7 @@ const ( identRx = `[\pL_][\pL_0-9]*` // Regexp for URLs - // Match parens, and check in pairedParensPrefixLen for balance - see #5043 + // Match parens, and check later for balance - see #5043, #22285 // Match .,:;?! within path, but not at end - see #18139, #16565 // This excludes some rare yet valid urls ending in common punctuation // in order to allow sentences ending in URLs. @@ -86,29 +86,6 @@ var ( html_endh = []byte("\n") ) -// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses. -func pairedParensPrefixLen(s string) int { - parens := 0 - l := len(s) - for i, ch := range s { - switch ch { - case '(': - if parens == 0 { - l = i - } - parens++ - case ')': - parens-- - if parens == 0 { - l = len(s) - } else if parens < 0 { - return i - } - } - } - return l -} - // Emphasize and escape a line of text for HTML. URLs are converted into links; // if the URL also appears in the words map, the link is taken from the map (if // the corresponding map value is the empty string, the URL is not converted @@ -128,13 +105,27 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) { // write text before match commentEscape(w, line[0:m[0]], nice) - // adjust match if necessary + // adjust match for URLs match := line[m[0]:m[1]] - if n := pairedParensPrefixLen(match); n < len(match) { - // match contains unpaired parentheses (rare); - // redo matching with shortened line for correct indices - m = matchRx.FindStringSubmatchIndex(line[:m[0]+n]) - match = match[:n] + if strings.Contains(match, "://") { + m0, m1 := m[0], m[1] + for _, s := range []string{"()", "{}", "[]"} { + open, close := s[:1], s[1:] // E.g., "(" and ")" + // require opening parentheses before closing parentheses (#22285) + if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) { + m1 = m0 + i + match = line[m0:m1] + } + // require balanced pairs of parentheses (#5043) + for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ { + m1 = strings.LastIndexAny(line[:m1], s) + match = line[m0:m1] + } + } + if m1 != m[1] { + // redo matching with shortened line for correct indices + m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)]) + } } // analyze match diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go index 1e6cf84cdf..e0adeb2f5c 100644 --- a/src/go/doc/comment_test.go +++ b/src/go/doc/comment_test.go @@ -151,6 +151,7 @@ func TestToText(t *testing.T) { var emphasizeTests = []struct { in, out string }{ + {"", ""}, {"http://[::1]:8080/foo.txt", `http://[::1]:8080/foo.txt`}, {"before (https://www.google.com) after", `before (https://www.google.com) after`}, {"before https://www.google.com:30/x/y/z:b::c. After", `before https://www.google.com:30/x/y/z:b::c. After`}, @@ -169,7 +170,13 @@ var emphasizeTests = []struct { {"Hello http://example.com/%2f/ /world.", `Hello http://example.com/%2f/ /world.`}, {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"}, {"javascript://is/not/linked", "javascript://is/not/linked"}, + {"http://foo", `http://foo`}, + {"art by [[https://www.example.com/person/][Person Name]]", `art by [[https://www.example.com/person/][Person Name]]`}, + {"please visit (http://golang.org/)", `please visit (http://golang.org/)`}, + {"please visit http://golang.org/hello())", `please visit http://golang.org/hello())`}, {"http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD", `http://git.qemu.org/?p=qemu.git;a=blob;f=qapi-schema.json;hb=HEAD`}, + {"https://foo.bar/bal/x(])", `https://foo.bar/bal/x(])`}, // inner ] causes (]) to be cut off from URL + {"foo [ http://bar(])", `foo [ http://bar(])`}, // outer [ causes ]) to be cut off from URL } func TestEmphasize(t *testing.T) { @@ -183,37 +190,6 @@ func TestEmphasize(t *testing.T) { } } -var pairedParensPrefixLenTests = []struct { - in, out string -}{ - {"", ""}, - {"foo", "foo"}, - {"()", "()"}, - {"foo()", "foo()"}, - {"foo()()()", "foo()()()"}, - {"foo()((()()))", "foo()((()()))"}, - {"foo()((()()))bar", "foo()((()()))bar"}, - {"foo)", "foo"}, - {"foo))", "foo"}, - {"foo)))))", "foo"}, - {"(foo", ""}, - {"((foo", ""}, - {"(((((foo", ""}, - {"(foo)", "(foo)"}, - {"((((foo))))", "((((foo))))"}, - {"foo()())", "foo()()"}, - {"foo((()())", "foo"}, - {"foo((()())) (() foo ", "foo((()())) "}, -} - -func TestPairedParensPrefixLen(t *testing.T) { - for i, tt := range pairedParensPrefixLenTests { - if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out { - t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out) - } - } -} - func TestCommentEscape(t *testing.T) { commentTests := []struct { in, out string