const (
// Regexp for Go identifiers
- identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
+ identRx = `[\pL_][\pL_0-9]*`
// Regexp for URLs
- protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
+ protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
hostPart = `[a-zA-Z0-9_@\-]+`
- filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
- urlRx = protocol + `//` + // http://
+ filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
+ urlRx = `(` + protocol + `)://` + // http://
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
filePart + `([:.,]` + filePart + `)*`
)
html_endh = []byte("</h3>\n")
)
+// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
+func pairedParensPrefixLen(s string) int {
+ parens := 0
+ l := len(s)
+ for i, ch := range s {
+ switch ch {
+ case '(':
+ if parens == 0 {
+ l = i
+ }
+ parens++
+ case ')':
+ parens--
+ if parens == 0 {
+ l = len(s)
+ } else if parens < 0 {
+ return i
+ }
+ }
+ }
+ return l
+}
+
// Emphasize and escape a line of text for HTML. URLs are converted into links;
// if the URL also appears in the words map, the link is taken from the map (if
// the corresponding map value is the empty string, the URL is not converted
// write text before match
commentEscape(w, line[0:m[0]], nice)
- // analyze match
+ // adjust match if necessary
match := line[m[0]:m[1]]
+ if n := pairedParensPrefixLen(match); n < len(match) {
+ // match contains unpaired parentheses (rare);
+ // redo matching with shortened line for correct indices
+ m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
+ match = match[:n]
+ }
+
+ // analyze match
url := ""
italics := false
if words != nil {
- url, italics = words[string(match)]
+ url, italics = words[match]
}
if m[2] >= 0 {
// match against first parenthesized sub-regexp; must be match against urlRx
if !italics {
// no alternative URL in words list, use match instead
- url = string(match)
+ url = match
}
italics = false // don't italicize URLs
}
}
var emphasizeTests = []struct {
- in string
- out string
+ in, out string
}{
{"http://www.google.com/", `<a href="http://www.google.com/">http://www.google.com/</a>`},
{"https://www.google.com/", `<a href="https://www.google.com/">https://www.google.com/</a>`},
{"http://www.google.com/path.", `<a href="http://www.google.com/path">http://www.google.com/path</a>.`},
+ {"http://en.wikipedia.org/wiki/Camellia_(cipher)", `<a href="http://en.wikipedia.org/wiki/Camellia_(cipher)">http://en.wikipedia.org/wiki/Camellia_(cipher)</a>`},
{"(http://www.google.com/)", `(<a href="http://www.google.com/">http://www.google.com/</a>)`},
+ {"http://gmail.com)", `<a href="http://gmail.com">http://gmail.com</a>)`},
+ {"((http://gmail.com))", `((<a href="http://gmail.com">http://gmail.com</a>))`},
+ {"http://gmail.com ((http://gmail.com)) ()", `<a href="http://gmail.com">http://gmail.com</a> ((<a href="http://gmail.com">http://gmail.com</a>)) ()`},
{"Foo bar http://example.com/ quux!", `Foo bar <a href="http://example.com/">http://example.com/</a> quux!`},
{"Hello http://example.com/%2f/ /world.", `Hello <a href="http://example.com/%2f/">http://example.com/%2f/</a> /world.`},
{"Lorem http: ipsum //host/path", "Lorem http: ipsum //host/path"},
}
}
}
+
+var pairedParensPrefixLenTests = []struct {
+ in, out string
+}{
+ {"", ""},
+ {"foo", "foo"},
+ {"()", "()"},
+ {"foo()", "foo()"},
+ {"foo()()()", "foo()()()"},
+ {"foo()((()()))", "foo()((()()))"},
+ {"foo()((()()))bar", "foo()((()()))bar"},
+ {"foo)", "foo"},
+ {"foo))", "foo"},
+ {"foo)))))", "foo"},
+ {"(foo", ""},
+ {"((foo", ""},
+ {"(((((foo", ""},
+ {"(foo)", "(foo)"},
+ {"((((foo))))", "((((foo))))"},
+ {"foo()())", "foo()()"},
+ {"foo((()())", "foo"},
+ {"foo((()())) (() foo ", "foo((()())) "},
+}
+
+func TestPairedParensPrefixLen(t *testing.T) {
+ for i, tt := range pairedParensPrefixLenTests {
+ if out := tt.in[:pairedParensPrefixLen(tt.in)]; out != tt.out {
+ t.Errorf("#%d: mismatch\nhave: %q\nwant: %q", i, out, tt.out)
+ }
+ }
+}