From: Robert Griesemer Date: Tue, 8 Apr 2014 20:51:44 +0000 (-0700) Subject: go/doc: fix URL matched in ToHTML X-Git-Tag: go1.3beta1~161 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=9610b616c6936edde5aa54312c8dc5e182aebf73;p=gostls13.git go/doc: fix URL matched in ToHTML Permit paired parentheses in URLs such as: http://en.wikipedia.org/wiki/Camellia_(cipher) Fixes #5043. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/85610043 --- diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index 274a625cf0..f414ca4090 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -45,13 +45,13 @@ func commentEscape(w io.Writer, text string, nice bool) { const ( // Regexp for Go identifiers - identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this + identRx = `[\pL_][\pL_0-9]*` // Regexp for URLs - protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` + protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero` hostPart = `[a-zA-Z0-9_@\-]+` - filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` - urlRx = protocol + `//` + // http:// + filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen + urlRx = `(` + protocol + `)://` + // http:// hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ filePart + `([:.,]` + filePart + `)*` ) @@ -73,6 +73,29 @@ var ( html_endh = []byte("\n") ) +// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses. +func pairedParensPrefixLen(s string) int { + parens := 0 + l := len(s) + for i, ch := range s { + switch ch { + case '(': + if parens == 0 { + l = i + } + parens++ + case ')': + parens-- + if parens == 0 { + l = len(s) + } else if parens < 0 { + return i + } + } + } + return l +} + // Emphasize and escape a line of text for HTML. URLs are converted into links; // if the URL also appears in the words map, the link is taken from the map (if // the corresponding map value is the empty string, the URL is not converted @@ -92,18 +115,26 @@ func emphasize(w io.Writer, line string, words map[string]string, nice bool) { // write text before match commentEscape(w, line[0:m[0]], nice) - // analyze match + // adjust match if necessary match := line[m[0]:m[1]] + if n := pairedParensPrefixLen(match); n < len(match) { + // match contains unpaired parentheses (rare); + // redo matching with shortened line for correct indices + m = matchRx.FindStringSubmatchIndex(line[:m[0]+n]) + match = match[:n] + } + + // analyze match url := "" italics := false if words != nil { - url, italics = words[string(match)] + url, italics = words[match] } if m[2] >= 0 { // match against first parenthesized sub-regexp; must be match against urlRx if !italics { // no alternative URL in words list, use match instead - url = string(match) + url = match } italics = false // don't italicize URLs } diff --git a/src/pkg/go/doc/comment_test.go b/src/pkg/go/doc/comment_test.go index 9f29a61153..ad65c2a27f 100644 --- a/src/pkg/go/doc/comment_test.go +++ b/src/pkg/go/doc/comment_test.go @@ -148,13 +148,16 @@ func TestToText(t *testing.T) { } var emphasizeTests = []struct { - in string - out string + in, out string }{ {"http://www.google.com/", `http://www.google.com/`}, {"https://www.google.com/", `https://www.google.com/`}, {"http://www.google.com/path.", `http://www.google.com/path.`}, + {"http://en.wikipedia.org/wiki/Camellia_(cipher)", `http://en.wikipedia.org/wiki/Camellia_(cipher)`}, {"(http://www.google.com/)", `(http://www.google.com/)`}, + {"http://gmail.com)", `http://gmail.com)`}, + {"((http://gmail.com))", `((http://gmail.com))`}, + {"http://gmail.com ((http://gmail.com)) ()", `http://gmail.com ((http://gmail.com)) ()`}, {"Foo bar http://example.com/ quux!", `Foo bar http://example.com/ quux!`}, {"Hello http://example.com/%2f/ /world.", `Hello http://example.com/%2f/ /world.`}, {"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"}, @@ -171,3 +174,34 @@ func TestEmphasize(t *testing.T) { } } } + +var pairedParensPrefixLenTests = []struct { + in, out string +}{ + {"", ""}, + {"foo", "foo"}, + {"()", "()"}, + {"foo()", "foo()"}, + {"foo()()()", "foo()()()"}, + {"foo()((()()))", "foo()((()()))"}, + {"foo()((()()))bar", "foo()((()()))bar"}, + {"foo)", "foo"}, + {"foo))", "foo"}, + {"foo)))))", "foo"}, + {"(foo", ""}, + {"((foo", ""}, + {"(((((foo", ""}, + {"(foo)", "(foo)"}, + {"((((foo))))", "((((foo))))"}, + {"foo()())", "foo()()"}, + {"foo((()())", "foo"}, + {"foo((()())) (() foo ", "foo((()())) "}, +} + +func TestPairedParensPrefixLen(t *testing.T) { + for i, tt := range pairedParensPrefixLenTests { + if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out { + t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out) + } + } +}