From: Robert Griesemer Date: Fri, 19 Mar 2010 20:01:45 +0000 (-0700) Subject: godoc: improved comment formatting: recognize URLs X-Git-Tag: weekly.2010-03-22~24 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=90f7209548ee73dd1e918a4a03a375e6818081ad;p=gostls13.git godoc: improved comment formatting: recognize URLs and highlight special words, if provided. Also: - related cleanups in src/pkg/go/doc/comment.go - fix typos in src/cmd/goinstall/doc.go Fixes #672. R=rsc CC=adg, golang-dev https://golang.org/cl/601042 --- diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index 8a8cd420ab..4a625311f4 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) { func htmlCommentFmt(w io.Writer, x interface{}, format string) { var buf bytes.Buffer writeAny(&buf, x, false) - doc.ToHTML(w, buf.Bytes()) // does html-escaping + // TODO(gri) Provide list of words (e.g. function parameters) + // to be emphasized by ToHTML. + doc.ToHTML(w, buf.Bytes(), nil) // does html-escaping } diff --git a/src/cmd/goinstall/doc.go b/src/cmd/goinstall/doc.go index c35e9e043b..c5f93f9290 100644 --- a/src/cmd/goinstall/doc.go +++ b/src/cmd/goinstall/doc.go @@ -6,7 +6,7 @@ Goinstall is an experiment in automatic package installation. It installs packages, possibly downloading them from the internet. -It maintains a list of public Go packages at http://godashboard.appspot.com/packages. +It maintains a list of public Go packages at http://godashboard.appspot.com/package. Usage: goinstall [flags] importpath... diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go index 6e9ad0b04c..3fc6396637 100644 --- a/src/pkg/go/doc/comment.go +++ b/src/pkg/go/doc/comment.go @@ -8,12 +8,25 @@ package doc import ( "go/ast" + "http" // for URLEscape "io" + "regexp" "strings" "template" // for htmlEscape ) -// Comment extraction + +func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' } + + +func stripTrailingWhitespace(s string) string { + i := len(s) + for i > 0 && isWhitespace(s[i-1]) { + i-- + } + return s[0:i] +} + // CommentText returns the text of comment, // with the comment markers - //, /*, and */ - removed. @@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string { comments[i] = string(c.Text) } - lines := make([]string, 0, 20) + lines := make([]string, 0, 10) // most comments are less than 10 lines for _, c := range comments { // Remove comment markers. // The parser has given us exactly the comment text. - switch n := len(c); { - case n >= 4 && c[0:2] == "/*" && c[n-2:n] == "*/": - c = c[2 : n-2] - case n >= 2 && c[0:2] == "//": - c = c[2:n] + switch c[1] { + case '/': + //-style comment + c = c[2:] // Remove leading space after //, if there is one. + // TODO(gri) This appears to be necessary in isolated + // cases (bignum.RatFromString) - why? if len(c) > 0 && c[0] == ' ' { c = c[1:] } + case '*': + /*-style comment */ + c = c[2 : len(c)-2] } // Split on newlines. @@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string { // Walk lines, stripping trailing white space and adding to list. for _, l := range cl { - // Strip trailing white space - m := len(l) - for m > 0 && (l[m-1] == ' ' || l[m-1] == '\n' || l[m-1] == '\t' || l[m-1] == '\r') { - m-- - } - l = l[0:m] - + l = stripTrailingWhitespace(l) // Add to list. n := len(lines) if n+1 >= cap(lines) { newlines := make([]string, n, 2*cap(lines)) - for k := range newlines { - newlines[k] = lines[k] - } + copy(newlines, lines) lines = newlines } lines = lines[0 : n+1] @@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string { return strings.Join(lines, "\n") } + // Split bytes into lines. func split(text []byte) [][]byte { // count lines @@ -127,28 +137,51 @@ var ( rdquo = []byte("”") ) -// Escape comment text for HTML. -// Also, turn `` into “ and '' into ”. -func commentEscape(w io.Writer, s []byte) { +// Escape comment text for HTML. If nice is set, +// also turn `` into “ and '' into ”. +func commentEscape(w io.Writer, s []byte, nice bool) { last := 0 - for i := 0; i < len(s)-1; i++ { - if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') { - template.HTMLEscape(w, s[last:i]) - last = i + 2 - switch s[i] { - case '`': - w.Write(ldquo) - case '\'': - w.Write(rdquo) + if nice { + for i := 0; i < len(s)-1; i++ { + ch := s[i] + if ch == s[i+1] && (ch == '`' || ch == '\'') { + template.HTMLEscape(w, s[last:i]) + last = i + 2 + switch ch { + case '`': + w.Write(ldquo) + case '\'': + w.Write(rdquo) + } + i++ // loop will add one more } - i++ // loop will add one more } } template.HTMLEscape(w, s[last:]) } +const ( + // Regexp for Go identifiers + identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this + + // Regexp for URLs + protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):` + hostPart = `[a-zA-Z0-9_@\-]+` + filePart = `[a-zA-Z0-9_?%#~&/\-+=]+` + urlRx = protocol + `//` + // http:// + hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/ + filePart + `([:.,]` + filePart + `)*` +) + +var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`) + var ( + html_a = []byte(``) + html_enda = []byte("") + html_i = []byte("") + html_endi = []byte("") html_p = []byte("

\n") html_endp = []byte("

\n") html_pre = []byte("
")
@@ -156,6 +189,66 @@ var (
 )
 
 
+// Emphasize and escape a line of text for HTML. URLs are converted into links;
+// if the URL also appears in the words map, the link is taken from the map (if
+// the corresponding map value is the empty string, the URL is not converted
+// into a link). Go identifiers that appear in the words map are italicized; if
+// the corresponding map value is not the empty string, it is considered a URL
+// and the word is converted into a link. If nice is set, the remaining text's
+// appearance is improved where is makes sense (e.g., `` is turned into “
+// and '' into ”).
+func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
+	for {
+		m := matchRx.Execute(line)
+		if len(m) == 0 {
+			break
+		}
+		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
+
+		// write text before match
+		commentEscape(w, line[0:m[0]], nice)
+
+		// analyze match
+		match := line[m[0]:m[1]]
+		url := ""
+		italics := false
+		if words != nil {
+			url, italics = words[string(match)]
+		}
+		if m[2] < 0 {
+			// didn't match against first parenthesized sub-regexp; must be match against urlRx
+			if !italics {
+				// no alternative URL in words list, use match instead
+				url = string(match)
+			}
+			italics = false // don't italicize URLs
+		}
+
+
+		// write match
+		if len(url) > 0 {
+			w.Write(html_a)
+			w.Write([]byte(http.URLEscape(url)))
+			w.Write(html_aq)
+		}
+		if italics {
+			w.Write(html_i)
+		}
+		commentEscape(w, match, nice)
+		if italics {
+			w.Write(html_endi)
+		}
+		if len(url) > 0 {
+			w.Write(html_enda)
+		}
+
+		// advance
+		line = line[m[1]:]
+	}
+	commentEscape(w, line, nice)
+}
+
+
 func indentLen(s []byte) int {
 	i := 0
 	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
@@ -207,11 +300,16 @@ func unindent(block [][]byte) {
 // The comment markers have already been removed.
 //
 // Turn each run of multiple \n into 

-// Turn each run of indented lines into

 without indent.
+// Turn each run of indented lines into a 
 block without indent.
+//
+// URLs in the comment text are converted into links; if the URL also appears
+// in the words map, the link is taken from the map (if the corresponding map
+// value is the empty string, the URL is not converted into a link).
 //
-// TODO(rsc): I'd like to pass in an array of variable names []string
-// and then italicize those strings when they appear as words.
-func ToHTML(w io.Writer, s []byte) {
+// Go identifiers that appear in the words map are italicized; if the corresponding
+// map value is not the empty string, it is considered a URL and the word is converted
+// into a link.
+func ToHTML(w io.Writer, s []byte, words map[string]string) {
 	inpara := false
 
 	close := func() {
@@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) {
 
 			unindent(block)
 
-			// put those lines in a pre block.
-			// they don't get the nice text formatting,
-			// just html escaping
+			// put those lines in a pre block
 			w.Write(html_pre)
 			for _, line := range block {
-				template.HTMLEscape(w, line)
+				emphasize(w, line, nil, false) // no nice text formatting
 			}
 			w.Write(html_endpre)
 			continue
 		}
 		// open paragraph
 		open()
-		commentEscape(w, lines[i])
+		emphasize(w, lines[i], words, true) // nice text formatting
 		i++
 	}
 	close()