godoc index: first step towards reducing index size

author Robert Griesemer <gri@golang.org>

Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)

committer Robert Griesemer <gri@golang.org>

Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)
author Robert Griesemer <gri@golang.org>
Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)
committer Robert Griesemer <gri@golang.org>
Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)
diff --git a/src/cmd/godoc/index.go b/src/cmd/godoc/index.go

index 8bf1a9eb3804fb4ab5af00a99cbc96d338663ddb..28ef26fc12f1a96be42630e9b4a42877636237da 100644 (file)
--- a/src/cmd/godoc/index.go
+++ b/src/cmd/godoc/index.go
@@ -242,8 +242,13 @@ func (p *Pak) less(q *Pak) bool {
  
  // A File describes a Go file.
  type File struct {
-       Path string // complete file name
-       Pak  Pak    // the package to which the file belongs
+       Name string // directory-local file name
+       Pak  *Pak   // the package to which the file belongs
+}
+
+// Path returns the file path of f.
+func (f *File) Path() string {
+       return filepath.Join(f.Pak.Path, f.Name)
  }
  
  // A Spot describes a single occurrence of a word.
@@ -258,8 +263,15 @@ type FileRun struct {
         Groups []*KindRun
  }
  
-// Spots are sorted by path for the reduction into FileRuns.
-func lessSpot(x, y interface{}) bool { return x.(Spot).File.Path < y.(Spot).File.Path }
+// Spots are sorted by file path for the reduction into FileRuns.
+func lessSpot(x, y interface{}) bool {
+       fx := x.(Spot).File
+       fy := y.(Spot).File
+       // same as "return fx.Path() < fy.Path()" but w/o computing the file path first
+       px := fx.Pak.Path
+       py := fy.Pak.Path
+       return px < py || px == py && fx.Name < fy.Name
+}
  
  // newFileRun allocates a new FileRun from the Spot run h.
  func newFileRun(h RunList) interface{} {
@@ -285,18 +297,18 @@ func newFileRun(h RunList) interface{} {
  
  // A PakRun describes a run of *FileRuns of a package.
  type PakRun struct {
-       Pak   Pak
+       Pak   *Pak
         Files []*FileRun
  }
  
  // Sorting support for files within a PakRun.
  func (p *PakRun) Len() int           { return len(p.Files) }
-func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Path < p.Files[j].File.Path }
+func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
  func (p *PakRun) Swap(i, j int)      { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
  
  // FileRuns are sorted by package for the reduction into PakRuns.
  func lessFileRun(x, y interface{}) bool {
-       return x.(*FileRun).File.Pak.less(&y.(*FileRun).File.Pak)
+       return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
  }
  
  // newPakRun allocates a new PakRun from the *FileRun run h.
@@ -318,7 +330,7 @@ func newPakRun(h RunList) interface{} {
  type HitList []*PakRun
  
  // PakRuns are sorted by package.
-func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(&y.(*PakRun).Pak) }
+func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
  
  func reduce(h0 RunList) HitList {
         // reduce a list of Spots into a list of FileRuns
@@ -414,6 +426,7 @@ type Statistics struct {
  type Indexer struct {
         fset     *token.FileSet          // file set for all indexed files
         sources  bytes.Buffer            // concatenated sources
+       packages map[string]*Pak         // map of canonicalized *Paks
         words    map[string]*IndexResult // RunLists of Spots
         snippets []*Snippet              // indices are stored in SpotInfos
         current  *token.File             // last file added to file set
@@ -422,6 +435,20 @@ type Indexer struct {
         stats    Statistics
  }
  
+func (x *Indexer) lookupPackage(path, name string) *Pak {
+       // In the source directory tree, more than one package may
+       // live in the same directory. For the packages map, construct
+       // a key that includes both the directory path and the package
+       // name.
+       key := path + ":" + name
+       pak := x.packages[key]
+       if pak == nil {
+               pak = &Pak{path, name}
+               x.packages[key] = pak
+       }
+       return pak
+}
+
  func (x *Indexer) addSnippet(s *Snippet) int {
         index := len(x.snippets)
         x.snippets = append(x.snippets, s)
@@ -704,9 +731,8 @@ func (x *Indexer) visitFile(dirname string, f FileInfo, fulltextIndex bool) {
         if fast != nil {
                 // we've got a Go file to index
                 x.current = file
-               dir, _ := filepath.Split(filename)
-               pak := Pak{dir, fast.Name.Name}
-               x.file = &File{filename, pak}
+               pak := x.lookupPackage(dirname, fast.Name.Name)
+               x.file = &File{f.Name(), pak}
                 ast.Walk(x, fast)
         }
  
@@ -743,8 +769,10 @@ func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Ind
         th := NewThrottle(throttle, 0.1e9) // run at least 0.1s at a time
  
         // initialize Indexer
+       // (use some reasonably sized maps to start)
         x.fset = token.NewFileSet()
-       x.words = make(map[string]*IndexResult)
+       x.packages = make(map[string]*Pak, 256)
+       x.words = make(map[string]*IndexResult, 8192)
  
         // index all files in the directories given by dirnames
         for dirname := range dirnames {
author	Robert Griesemer <gri@golang.org>
	Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)
committer	Robert Griesemer <gri@golang.org>
	Wed, 31 Aug 2011 01:47:15 +0000 (18:47 -0700)