From: Robert Griesemer Date: Sun, 12 Dec 2010 21:13:07 +0000 (-0800) Subject: godoc: improved textual search X-Git-Tag: weekly.2010-12-15~53 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=5e54e9367566e9ce9334aae1e5ad76ac31673c29;p=gostls13.git godoc: improved textual search - improved search result page - clicking on files shows highlighted search phrase (.go files loose their godoc-specific formatting and highlighting in this mode - a better solution is in the works) - support for textual results - fixed bug with non-URL escaped URL parameter (Query) R=rsc, adg CC=golang-dev https://golang.org/cl/3585041 --- diff --git a/lib/godoc/search.html b/lib/godoc/search.html index 29ea41cc92..92cd4faf89 100644 --- a/lib/godoc/search.html +++ b/lib/godoc/search.html @@ -7,7 +7,7 @@ {.section Accurate} {.or}

- Indexing in progress - result may be inaccurate + Indexing in progress - result may be inaccurate.

{.end} {.section Alt} @@ -26,7 +26,7 @@ {.repeated section Files} {.repeated section Groups} {.repeated section Infos} - {File.Path|url-src}:{@|infoLine} + {File.Path|url-src}:{@|infoLine}
{@|infoSnippet}
{.end} {.end} @@ -38,7 +38,7 @@ {.repeated section @}

package {Pak.Name|html-esc}

{.repeated section Files} - {File.Path|url-src} + {File.Path|url-src} {.repeated section Groups} @@ -47,7 +47,7 @@ @@ -57,36 +57,32 @@ {.end} {.end} {.end} -{.section Illegal} -

- Illegal query syntax -

-

- A legal query is a single identifier (such as ToLower) - or a qualified identifier (such as math.Sin). -

-{.end} {.section Textual} -

Textual occurences

+ {.section Complete} +

{Found|html-esc} textual occurences

+ {.or} +

More than {Found|html-esc} textual occurences

+

+ Not all files or lines containing {Query|html-esc} are shown. +

+ {.end} +

{.repeated section Infos} - {@|infoLine} + {@|infoLine} {.end}
- - - - - {.repeated section @} - - {Lines|linelist} + + + + {.end} + {.section Complete} + {.or} + + {.end}
FileOccurencesLines
- {Filename|url-src}: + + {Filename|url-src}: {Lines|numlines}{Lines Complete|linelist}
...
-{.end} -{.section Complete} -{.or} -

- Incomplete list of results

{.end} diff --git a/lib/godoc/search.txt b/lib/godoc/search.txt index 90266292c4..0c949f9d1d 100644 --- a/lib/godoc/search.txt +++ b/lib/godoc/search.txt @@ -1,5 +1,5 @@ QUERY -{Query} + {Query} {.section Accurate} {.or} @@ -45,9 +45,18 @@ package {Pak.Name} {.end} {.end} {.end} -{.section Illegal} -ILLEGAL QUERY SYNTAX +{.section Textual} +{.section Complete} +{Found} TEXTUAL OCCURENCES +{.or} +MORE THAN {Found} TEXTUAL OCCURENCES +{.end} -A legal query is a single identifier (such as ToLower) -or a qualified identifier (such as math.Sin). +{.repeated section @} +{Lines|numlines} {Filename|url-src} +{.end} +{.section Complete} +{.or} +... ... +{.end} {.end} diff --git a/src/cmd/godoc/godoc.go b/src/cmd/godoc/godoc.go index 5dced1498b..d8c1dfc70a 100644 --- a/src/cmd/godoc/godoc.go +++ b/src/cmd/godoc/godoc.go @@ -361,7 +361,7 @@ func writeObjInfo(w io.Writer, fset *token.FileSet, obj *ast.Object) { // for 0 <= i < s.idcount. func (s *Styler) idList(fset *token.FileSet) []byte { var buf bytes.Buffer - fmt.Fprintln(&buf, "[") + buf.WriteString("[\n") if s.idcount > 0 { // invert objmap: create an array [id]obj from map[obj]id @@ -382,7 +382,7 @@ func (s *Styler) idList(fset *token.FileSet) []byte { } } - fmt.Fprintln(&buf, "]") + buf.WriteString("]\n") return buf.Bytes() } @@ -600,7 +600,15 @@ func textFmt(w io.Writer, format string, x ...interface{}) { } -// Template formatter for the various "url-xxx" formats. +// Template formatter for "urlquery-esc" format. +func urlQueryEscFmt(w io.Writer, format string, x ...interface{}) { + var buf bytes.Buffer + writeAny(&buf, fileset(x), false, x[0]) + template.HTMLEscape(w, []byte(http.URLEscape(string(buf.Bytes())))) +} + + +// Template formatter for the various "url-xxx" formats excluding url-esc. func urlFmt(w io.Writer, format string, x ...interface{}) { var path string var line int @@ -737,21 +745,30 @@ func localnameFmt(w io.Writer, format string, x ...interface{}) { } +// Template formatter for "numlines" format. +func numlinesFmt(w io.Writer, format string, x ...interface{}) { + list := x[0].([]int) + fmt.Fprintf(w, "%d", len(list)) +} + + // Template formatter for "linelist" format. func linelistFmt(w io.Writer, format string, x ...interface{}) { - const max = 20 // show at most this many lines list := x[0].([]int) - // print number of occurences - fmt.Fprintf(w, "%d", len(list)) - // print actual lines - // TODO(gri) should sort them - for i, line := range list { - if i < max { - fmt.Fprintf(w, "%d", line) - } else { - fmt.Fprint(w, "...") - break - } + complete := x[1].(bool) + + const max = 100 // show at most this many lines + if len(list) > max { + list = list[0:max] + complete = false + } + sort.SortInts(list) + + for _, line := range list { + fmt.Fprintf(w, " %d", line) + } + if !complete { + fmt.Fprintf(w, " ...") } } @@ -761,6 +778,7 @@ var fmap = template.FormatterMap{ "html": htmlFmt, "html-esc": htmlEscFmt, "html-comment": htmlCommentFmt, + "urlquery-esc": urlQueryEscFmt, "url-pkg": urlFmt, "url-src": urlFmt, "url-pos": urlFmt, @@ -771,6 +789,7 @@ var fmap = template.FormatterMap{ "time": timeFmt, "dir/": dirslashFmt, "localname": localnameFmt, + "numlines": numlinesFmt, "linelist": linelistFmt, } @@ -998,6 +1017,35 @@ func isTextFile(path string) bool { } +// HTMLSubst replaces all occurences of f in s with r and HTML-escapes +// everything else in s (but not r). The result is written to w. +// +func HTMLSubst(w io.Writer, s, f, r []byte) { + for { + i := bytes.Index(s, f) + if i < 0 { + break + } + template.HTMLEscape(w, s[0:i]) + w.Write(r) + s = s[i+len(f):] + } + template.HTMLEscape(w, s) +} + + +// highlight highlights all occurrences of h in s and writes the +// HTML-escaped result to w. +// +func highlight(w io.Writer, s, h []byte) { + var r bytes.Buffer + r.WriteString(``) + template.HTMLEscape(&r, h) + r.WriteString(``) + HTMLSubst(w, s, h, r.Bytes()) +} + + func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath string) { src, err := ioutil.ReadFile(abspath) if err != nil { @@ -1007,9 +1055,14 @@ func serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath stri } var buf bytes.Buffer - fmt.Fprintln(&buf, "
")
-	template.HTMLEscape(&buf, src)
-	fmt.Fprintln(&buf, "
") + buf.WriteString("
\n")
+	g := r.FormValue("g")
+	if g != "" {
+		highlight(&buf, src, []byte(g))
+	} else {
+		template.HTMLEscape(&buf, src)
+	}
+	buf.WriteString("
\n") servePage(w, "Text file "+relpath, "", "", buf.Bytes()) } @@ -1066,6 +1119,10 @@ func serveFile(w http.ResponseWriter, r *http.Request) { return case ".go": + if r.FormValue("g") != "" { + serveTextFile(w, r, abspath, relpath) + return + } serveGoSource(w, r, abspath, relpath) return } @@ -1332,7 +1389,7 @@ type SearchResult struct { Query string Hit *LookupResult // identifier occurences of Query Alt *AltWords // alternative identifiers to look for - Illegal bool // true if Query for identifier search has incorrect syntax + Found int // number of textual occurences found Textual []Positions // textual occurences of Query Complete bool // true if all textual occurences of Query are reported Accurate bool // true if the index is not older than the indexed files @@ -1343,10 +1400,10 @@ func lookup(query string) (result SearchResult) { result.Query = query if index, timestamp := searchIndex.get(); index != nil { index := index.(*Index) - result.Hit, result.Alt, result.Illegal = index.Lookup(query) + result.Hit, result.Alt, _ = index.Lookup(query) // TODO(gri) should max be a flag? const max = 5000 // show at most this many fulltext results - result.Textual, result.Complete = index.LookupString(query, max) + result.Found, result.Textual, result.Complete = index.LookupString(query, max) _, ts := fsModified.get() result.Accurate = timestamp >= ts } @@ -1440,8 +1497,8 @@ func indexer() { if *verbose { secs := float64((stop-start)/1e6) / 1e3 stats := index.Stats() - log.Printf("index updated (%gs, %d bytes of source, %d files, %d unique words, %d spots)", - secs, stats.Bytes, stats.Files, stats.Words, stats.Spots) + log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)", + secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots) } log.Printf("before GC: bytes = %d footprint = %d\n", runtime.MemStats.HeapAlloc, runtime.MemStats.Sys) runtime.GC() diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go index ff51a278e4..906c8c9a42 100644 --- a/src/cmd/godoc/index.go +++ b/src/cmd/godoc/index.go @@ -443,6 +443,7 @@ type IndexResult struct { type Statistics struct { Bytes int // total size of indexed source files Files int // number of indexed source files + Lines int // number of lines (all files) Words int // number of different identifiers Spots int // number of identifier occurences } @@ -699,6 +700,7 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo) { // (count real file size as opposed to using the padded x.sources.Len()) x.stats.Bytes += x.current.Size() x.stats.Files++ + x.stats.Lines += x.current.LineCount() } @@ -891,13 +893,13 @@ type Positions struct { } -// LookupString returns a list of positions where a string s is found -// in the full text index and whether the result is complete or not. -// At most n positions (filename and line) are returned. The result is -// not complete if the index is not present or there are more than n -// occurrences of s. +// LookupString returns the number and list of positions where a string +// s is found in the full text index and whether the result is complete +// or not. At most n positions (filename and line) are returned (and thus +// found <= n). The result is incomplete if the index is not present or +// if there are more than n occurrences of s. // -func (x *Index) LookupString(s string, n int) (result []Positions, complete bool) { +func (x *Index) LookupString(s string, n int) (found int, result []Positions, complete bool) { if x.suffixes == nil { return } @@ -908,6 +910,7 @@ func (x *Index) LookupString(s string, n int) (result []Positions, complete bool } else { offsets = offsets[0:n] } + found = len(offsets) // compute file names and lines and sort the list by filename list := make(positionList, len(offsets))