]> Cypherpunks repositories - gostls13.git/commitdiff
go/doc: allow : in godoc links
authorKenny Grant <kennygrant@gmail.com>
Sun, 18 Dec 2016 07:24:58 +0000 (07:24 +0000)
committerBrad Fitzpatrick <bradfitz@golang.org>
Sat, 18 Feb 2017 06:35:36 +0000 (06:35 +0000)
The emphasize function used a complex regexp to find URLs, which
truncated some types of URL and did not match others.
This has been simplified and adjusted to allow valid punctuation
like :: or ! in the path part and :[] in the host part.
Comments were added to clarify what this regexp allows.
The path part matches query and fragment also so document this.
Removed news, telnet, wais, and prospero protocols.

Tests were added for:
 IPV6 URLs
 URLs surrounded by brackets
 URLs containing ::
 URLs containing :;!- in the path

In order to allow punctuation and yet preserve current behaviour,
URLs are not permitted to end in .,:;?! to allow the use of
normal punctuation surrounding URLs in comments.

Fixes #18139

Change-Id: I38b2d7a85fe0d171e4bf4aac420f8c2d3ced8a2f
Reviewed-on: https://go-review.googlesource.com/37192
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/go/doc/comment.go
src/go/doc/comment_test.go

index 15e034b6df8b7d07bd55b7f5fa4825e92959b0f1..4228e8cd9c5573000984b0d38c581561fa88ea62 100644 (file)
@@ -48,12 +48,19 @@ const (
        identRx = `[\pL_][\pL_0-9]*`
 
        // Regexp for URLs
-       protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
-       hostPart = `[a-zA-Z0-9_@\-]+`
-       filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
-       urlRx    = `(` + protocol + `)://` +   // http://
-               hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
-               filePart + `([:.,;]` + filePart + `)*`
+       // Match parens, and check in pairedParensPrefixLen for balance - see #5043
+       // Match .,:;?! within path, but not at end - see #18139, #16565
+       // This excludes some rare yet valid urls ending in common punctuation
+       // in order to allow sentences ending in URLs.
+
+       // protocol (required) e.g. http
+       protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
+       // host (required) e.g. www.example.com or [::1]:8080
+       hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
+       // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
+       pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
+
+       urlRx = protoPart + `://` + hostPart + pathPart
 )
 
 var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
index 76dfbeac796b8da3247438baa076bc6c0fb8421b..0523ab899ee345e00d5cab7f5fef8cca7277f612 100644 (file)
@@ -150,6 +150,12 @@ func TestToText(t *testing.T) {
 var emphasizeTests = []struct {
        in, out string
 }{
+       {"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
+       {"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
+       {"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
+       {"http://www.google.com/path/:;!-/?query=%34b#093124", `<a href="http://www.google.com/path/:;!-/?query=%34b#093124">http://www.google.com/path/:;!-/?query=%34b#093124</a>`},
+       {"http://www.google.com/path/:;!-/?query=%34bar#093124", `<a href="http://www.google.com/path/:;!-/?query=%34bar#093124">http://www.google.com/path/:;!-/?query=%34bar#093124</a>`},
+       {"http://www.google.com/index.html! After", `<a href="http://www.google.com/index.html">http://www.google.com/index.html</a>! After`},
        {"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
        {"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
        {"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},