identRx = `[\pL_][\pL_0-9]*`
// Regexp for URLs
- protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
- hostPart = `[a-zA-Z0-9_@\-]+`
- filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
- urlRx = `(` + protocol + `)://` + // http://
- hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
- filePart + `([:.,;]` + filePart + `)*`
+ // Match parens, and check in pairedParensPrefixLen for balance - see #5043
+ // Match .,:;?! within path, but not at end - see #18139, #16565
+ // This excludes some rare yet valid urls ending in common punctuation
+ // in order to allow sentences ending in URLs.
+
+ // protocol (required) e.g. http
+ protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
+ // host (required) e.g. www.example.com or [::1]:8080
+ hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
+ // path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
+ pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
+
+ urlRx = protoPart + `://` + hostPart + pathPart
)
var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
var emphasizeTests = []struct {
in, out string
}{
+ {"http://[::1]:8080/foo.txt", `<a href="http://[::1]:8080/foo.txt">http://[::1]:8080/foo.txt</a>`},
+ {"before (https://www.google.com) after", `before (<a href="https://www.google.com">https://www.google.com</a>) after`},
+ {"before https://www.google.com:30/x/y/z:b::c. After", `before <a href="https://www.google.com:30/x/y/z:b::c">https://www.google.com:30/x/y/z:b::c</a>. After`},
+ {"http://www.google.com/path/:;!-/?query=%34b#093124", `<a href="http://www.google.com/path/:;!-/?query=%34b#093124">http://www.google.com/path/:;!-/?query=%34b#093124</a>`},
+ {"http://www.google.com/path/:;!-/?query=%34bar#093124", `<a href="http://www.google.com/path/:;!-/?query=%34bar#093124">http://www.google.com/path/:;!-/?query=%34bar#093124</a>`},
+ {"http://www.google.com/index.html! After", `<a href="http://www.google.com/index.html">http://www.google.com/index.html</a>! After`},
{"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
{"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
{"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},