godoc: improved comment formatting: recognize URLs

author Robert Griesemer <gri@golang.org>

Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)

committer Robert Griesemer <gri@golang.org>

Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)
author Robert Griesemer <gri@golang.org>
Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)
committer Robert Griesemer <gri@golang.org>
Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)
diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go

index 8a8cd420ab553d9455b07ae3d974bf9f742a29bf..4a625311f4d187b6abb15545e190dad5bf632a47 100644 (file)
--- a/src/cmd/godoc/godoc.go
+++ b/src/cmd/godoc/godoc.go
@@ -665,7 +665,9 @@ func htmlEscFmt(w io.Writer, x interface{}, format string) {
  func htmlCommentFmt(w io.Writer, x interface{}, format string) {
         var buf bytes.Buffer
         writeAny(&buf, x, false)
-       doc.ToHTML(w, buf.Bytes()) // does html-escaping
+       // TODO(gri) Provide list of words (e.g. function parameters)
+       //           to be emphasized by ToHTML.
+       doc.ToHTML(w, buf.Bytes(), nil) // does html-escaping
  }
  
  
diff --git a/src/cmd/goinstall/doc.go b/src/cmd/goinstall/doc.go

index c35e9e043b775464e244bcd46d866e1467779bf9..c5f93f9290dc76fe9052c7d839b0d40e556d119f 100644 (file)
--- a/src/cmd/goinstall/doc.go
+++ b/src/cmd/goinstall/doc.go
@@ -6,7 +6,7 @@
  
  Goinstall is an experiment in automatic package installation.
  It installs packages, possibly downloading them from the internet.
-It maintains a list of public Go packages at http://godashboard.appspot.com/packages.
+It maintains a list of public Go packages at http://godashboard.appspot.com/package.
  
  Usage:
         goinstall [flags] importpath...
diff --git a/src/pkg/go/doc/comment.go b/src/pkg/go/doc/comment.go

index 6e9ad0b04cf87bdfaf441028a28a402e79a6bad3..3fc63966373c6b5cddbdf568dca8510ed0f6ed57 100644 (file)
--- a/src/pkg/go/doc/comment.go
+++ b/src/pkg/go/doc/comment.go
@@ -8,12 +8,25 @@ package doc
  
  import (
         "go/ast"
+       "http" // for URLEscape
         "io"
+       "regexp"
         "strings"
         "template" // for htmlEscape
  )
  
-// Comment extraction
+
+func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
+
+
+func stripTrailingWhitespace(s string) string {
+       i := len(s)
+       for i > 0 && isWhitespace(s[i-1]) {
+               i--
+       }
+       return s[0:i]
+}
+
  
  // CommentText returns the text of comment,
  // with the comment markers - //, /*, and */ - removed.
@@ -26,19 +39,23 @@ func CommentText(comment *ast.CommentGroup) string {
                 comments[i] = string(c.Text)
         }
  
-       lines := make([]string, 0, 20)
+       lines := make([]string, 0, 10) // most comments are less than 10 lines
         for _, c := range comments {
                 // Remove comment markers.
                 // The parser has given us exactly the comment text.
-               switch n := len(c); {
-               case n >= 4 && c[0:2] == "/*" && c[n-2:n] == "*/":
-                       c = c[2 : n-2]
-               case n >= 2 && c[0:2] == "//":
-                       c = c[2:n]
+               switch c[1] {
+               case '/':
+                       //-style comment
+                       c = c[2:]
                         // Remove leading space after //, if there is one.
+                       // TODO(gri) This appears to be necessary in isolated
+                       //           cases (bignum.RatFromString) - why?
                         if len(c) > 0 && c[0] == ' ' {
                                 c = c[1:]
                         }
+               case '*':
+                       /*-style comment */
+                       c = c[2 : len(c)-2]
                 }
  
                 // Split on newlines.
@@ -46,20 +63,12 @@ func CommentText(comment *ast.CommentGroup) string {
  
                 // Walk lines, stripping trailing white space and adding to list.
                 for _, l := range cl {
-                       // Strip trailing white space
-                       m := len(l)
-                       for m > 0 && (l[m-1] == ' ' || l[m-1] == '\n' || l[m-1] == '\t' || l[m-1] == '\r') {
-                               m--
-                       }
-                       l = l[0:m]
-
+                       l = stripTrailingWhitespace(l)
                         // Add to list.
                         n := len(lines)
                         if n+1 >= cap(lines) {
                                 newlines := make([]string, n, 2*cap(lines))
-                               for k := range newlines {
-                                       newlines[k] = lines[k]
-                               }
+                               copy(newlines, lines)
                                 lines = newlines
                         }
                         lines = lines[0 : n+1]
@@ -88,6 +97,7 @@ func CommentText(comment *ast.CommentGroup) string {
         return strings.Join(lines, "\n")
  }
  
+
  // Split bytes into lines.
  func split(text []byte) [][]byte {
         // count lines
@@ -127,28 +137,51 @@ var (
         rdquo = []byte("&rdquo;")
  )
  
-// Escape comment text for HTML.
-// Also, turn `` into &ldquo; and '' into &rdquo;.
-func commentEscape(w io.Writer, s []byte) {
+// Escape comment text for HTML. If nice is set,
+// also turn `` into &ldquo; and '' into &rdquo;.
+func commentEscape(w io.Writer, s []byte, nice bool) {
         last := 0
-       for i := 0; i < len(s)-1; i++ {
-               if s[i] == s[i+1] && (s[i] == '`' || s[i] == '\'') {
-                       template.HTMLEscape(w, s[last:i])
-                       last = i + 2
-                       switch s[i] {
-                       case '`':
-                               w.Write(ldquo)
-                       case '\'':
-                               w.Write(rdquo)
+       if nice {
+               for i := 0; i < len(s)-1; i++ {
+                       ch := s[i]
+                       if ch == s[i+1] && (ch == '`' || ch == '\'') {
+                               template.HTMLEscape(w, s[last:i])
+                               last = i + 2
+                               switch ch {
+                               case '`':
+                                       w.Write(ldquo)
+                               case '\'':
+                                       w.Write(rdquo)
+                               }
+                               i++ // loop will add one more
                         }
-                       i++ // loop will add one more
                 }
         }
         template.HTMLEscape(w, s[last:])
  }
  
  
+const (
+       // Regexp for Go identifiers
+       identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
+
+       // Regexp for URLs
+       protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
+       hostPart = `[a-zA-Z0-9_@\-]+`
+       filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
+       urlRx    = protocol + `//` + // http://
+               hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
+               filePart + `([:.,]` + filePart + `)*`
+)
+
+var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
+
  var (
+       html_a      = []byte(`<a href="`)
+       html_aq     = []byte(`">`)
+       html_enda   = []byte("</a>")
+       html_i      = []byte("<i>")
+       html_endi   = []byte("</i>")
         html_p      = []byte("<p>\n")
         html_endp   = []byte("</p>\n")
         html_pre    = []byte("<pre>")
@@ -156,6 +189,66 @@ var (
  )
  
  
+// Emphasize and escape a line of text for HTML. URLs are converted into links;
+// if the URL also appears in the words map, the link is taken from the map (if
+// the corresponding map value is the empty string, the URL is not converted
+// into a link). Go identifiers that appear in the words map are italicized; if
+// the corresponding map value is not the empty string, it is considered a URL
+// and the word is converted into a link. If nice is set, the remaining text's
+// appearance is improved where is makes sense (e.g., `` is turned into &ldquo;
+// and '' into &rdquo;).
+func emphasize(w io.Writer, line []byte, words map[string]string, nice bool) {
+       for {
+               m := matchRx.Execute(line)
+               if len(m) == 0 {
+                       break
+               }
+               // m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
+
+               // write text before match
+               commentEscape(w, line[0:m[0]], nice)
+
+               // analyze match
+               match := line[m[0]:m[1]]
+               url := ""
+               italics := false
+               if words != nil {
+                       url, italics = words[string(match)]
+               }
+               if m[2] < 0 {
+                       // didn't match against first parenthesized sub-regexp; must be match against urlRx
+                       if !italics {
+                               // no alternative URL in words list, use match instead
+                               url = string(match)
+                       }
+                       italics = false // don't italicize URLs
+               }
+
+
+               // write match
+               if len(url) > 0 {
+                       w.Write(html_a)
+                       w.Write([]byte(http.URLEscape(url)))
+                       w.Write(html_aq)
+               }
+               if italics {
+                       w.Write(html_i)
+               }
+               commentEscape(w, match, nice)
+               if italics {
+                       w.Write(html_endi)
+               }
+               if len(url) > 0 {
+                       w.Write(html_enda)
+               }
+
+               // advance
+               line = line[m[1]:]
+       }
+       commentEscape(w, line, nice)
+}
+
+
  func indentLen(s []byte) int {
         i := 0
         for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
@@ -207,11 +300,16 @@ func unindent(block [][]byte) {
  // The comment markers have already been removed.
  //
  // Turn each run of multiple \n into </p><p>
-// Turn each run of indented lines into <pre> without indent.
+// Turn each run of indented lines into a <pre> block without indent.
+//
+// URLs in the comment text are converted into links; if the URL also appears
+// in the words map, the link is taken from the map (if the corresponding map
+// value is the empty string, the URL is not converted into a link).
  //
-// TODO(rsc): I'd like to pass in an array of variable names []string
-// and then italicize those strings when they appear as words.
-func ToHTML(w io.Writer, s []byte) {
+// Go identifiers that appear in the words map are italicized; if the corresponding
+// map value is not the empty string, it is considered a URL and the word is converted
+// into a link.
+func ToHTML(w io.Writer, s []byte, words map[string]string) {
         inpara := false
  
         close := func() {
@@ -255,19 +353,17 @@ func ToHTML(w io.Writer, s []byte) {
  
                         unindent(block)
  
-                       // put those lines in a pre block.
-                       // they don't get the nice text formatting,
-                       // just html escaping
+                       // put those lines in a pre block
                         w.Write(html_pre)
                         for _, line := range block {
-                               template.HTMLEscape(w, line)
+                               emphasize(w, line, nil, false) // no nice text formatting
                         }
                         w.Write(html_endpre)
                         continue
                 }
                 // open paragraph
                 open()
-               commentEscape(w, lines[i])
+               emphasize(w, lines[i], words, true) // nice text formatting
                 i++
         }
         close()
author	Robert Griesemer <gri@golang.org>
	Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)
committer	Robert Griesemer <gri@golang.org>
	Fri, 19 Mar 2010 20:01:45 +0000 (13:01 -0700)
src/cmd/godoc/godoc.go		patch \| blob \| history
src/cmd/goinstall/doc.go		patch \| blob \| history
src/pkg/go/doc/comment.go		patch \| blob \| history