go/doc/comment: add paragraph parsing and test framework

author Russ Cox <rsc@golang.org>

Sun, 3 Apr 2022 12:15:40 +0000 (08:15 -0400)

committer Russ Cox <rsc@golang.org>

Mon, 11 Apr 2022 16:31:37 +0000 (16:31 +0000)
author Russ Cox <rsc@golang.org>
Sun, 3 Apr 2022 12:15:40 +0000 (08:15 -0400)
committer Russ Cox <rsc@golang.org>
Mon, 11 Apr 2022 16:31:37 +0000 (16:31 +0000)
diff --git a/api/next/51082.txt b/api/next/51082.txt

index 2127d2ee24aaea186fbf162b5c16eff7705a8913..2cafd4b5337be6f4caed682bc97483d5993b109a 100644 (file)
--- a/api/next/51082.txt
+++ b/api/next/51082.txt
@@ -1,5 +1,6 @@
  pkg go/doc/comment, method (*List) BlankBefore() bool #51082
  pkg go/doc/comment, method (*List) BlankBetween() bool #51082
+pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082
  pkg go/doc/comment, type Block interface, unexported methods #51082
  pkg go/doc/comment, type Code struct #51082
  pkg go/doc/comment, type Code struct, Text string #51082
@@ -31,5 +32,9 @@ pkg go/doc/comment, type ListItem struct, Content []Block #51082
  pkg go/doc/comment, type ListItem struct, Number string #51082
  pkg go/doc/comment, type Paragraph struct #51082
  pkg go/doc/comment, type Paragraph struct, Text []Text #51082
+pkg go/doc/comment, type Parser struct #51082
+pkg go/doc/comment, type Parser struct, LookupPackage func(string) (string, bool) #51082
+pkg go/doc/comment, type Parser struct, LookupSym func(string, string) bool #51082
+pkg go/doc/comment, type Parser struct, Words map[string]string #51082
  pkg go/doc/comment, type Plain string #51082
  pkg go/doc/comment, type Text interface, unexported methods #51082
diff --git a/src/go/doc/comment/parse.go b/src/go/doc/comment/parse.go

index 12b66794135626e458de50b41982071efda9591d..b12a0d84b95d06574ede94cb80c92d57765331a7 100644 (file)
--- a/src/go/doc/comment/parse.go
+++ b/src/go/doc/comment/parse.go
@@ -174,6 +174,152 @@ type DocLink struct {
  
  func (*DocLink) text() {}
  
+// A Parser is a doc comment parser.
+// The fields in the struct can be filled in before calling Parse
+// in order to customize the details of the parsing process.
+type Parser struct {
+       // Words is a map of Go identifier words that
+       // should be italicized and potentially linked.
+       // If Words[w] is the empty string, then the word w
+       // is only italicized. Otherwise it is linked, using
+       // Words[w] as the link target.
+       // Words corresponds to the [go/doc.ToHTML] words parameter.
+       Words map[string]string
+
+       // LookupPackage resolves a package name to an import path.
+       //
+       // If LookupPackage(name) returns ok == true, then [name]
+       // (or [name.Sym] or [name.Sym.Method])
+       // is considered a documentation link to importPath's package docs.
+       // It is valid to return "", true, in which case name is considered
+       // to refer to the current package.
+       //
+       // If LookupPackage(name) returns ok == false,
+       // then [name] (or [name.Sym] or [name.Sym.Method])
+       // will not be considered a documentation link,
+       // except in the case where name is the full (but single-element) import path
+       // of a package in the standard library, such as in [math] or [io.Reader].
+       // LookupPackage is still called for such names,
+       // in order to permit references to imports of other packages
+       // with the same package names.
+       //
+       // Setting LookupPackage to nil is equivalent to setting it to
+       // a function that always returns "", false.
+       LookupPackage func(name string) (importPath string, ok bool)
+
+       // LookupSym reports whether a symbol name or method name
+       // exists in the current package.
+       //
+       // If LookupSym("", "Name") returns true, then [Name]
+       // is considered a documentation link for a const, func, type, or var.
+       //
+       // Similarly, if LookupSym("Recv", "Name") returns true,
+       // then [Recv.Name] is considered a documentation link for
+       // type Recv's method Name.
+       //
+       // Setting LookupSym to nil is equivalent to setting it to a function
+       // that always returns false.
+       LookupSym func(recv, name string) (ok bool)
+}
+
+// parseDoc is parsing state for a single doc comment.
+type parseDoc struct {
+       *Parser
+       *Doc
+       links     map[string]*LinkDef
+       lines     []string
+       lookupSym func(recv, name string) bool
+}
+
+// Parse parses the doc comment text and returns the *Doc form.
+// Comment markers (/* // and */) in the text must have already been removed.
+func (p *Parser) Parse(text string) *Doc {
+       lines := unindent(strings.Split(text, "\n"))
+       d := &parseDoc{
+               Parser:    p,
+               Doc:       new(Doc),
+               links:     make(map[string]*LinkDef),
+               lines:     lines,
+               lookupSym: func(recv, name string) bool { return false },
+       }
+       if p.LookupSym != nil {
+               d.lookupSym = p.LookupSym
+       }
+
+       // First pass: break into block structure and collect known links.
+       // The text is all recorded as Plain for now.
+       // TODO: Break into actual block structure.
+       for len(lines) > 0 {
+               line := lines[0]
+               if line != "" {
+                       var b Block
+                       b, lines = d.paragraph(lines)
+                       d.Content = append(d.Content, b)
+               } else {
+                       lines = lines[1:]
+               }
+       }
+
+       // Second pass: interpret all the Plain text now that we know the links.
+       // TODO: Actually interpret the plain text.
+
+       return d.Doc
+}
+
+// unindent removes any common space/tab prefix
+// from each line in lines, returning a copy of lines in which
+// those prefixes have been trimmed from each line.
+func unindent(lines []string) []string {
+       // Trim leading and trailing blank lines.
+       for len(lines) > 0 && isBlank(lines[0]) {
+               lines = lines[1:]
+       }
+       for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
+               lines = lines[:len(lines)-1]
+       }
+       if len(lines) == 0 {
+               return nil
+       }
+
+       // Compute and remove common indentation.
+       prefix := leadingSpace(lines[0])
+       for _, line := range lines[1:] {
+               if !isBlank(line) {
+                       prefix = commonPrefix(prefix, leadingSpace(line))
+               }
+       }
+
+       out := make([]string, len(lines))
+       for i, line := range lines {
+               line = strings.TrimPrefix(line, prefix)
+               if strings.TrimSpace(line) == "" {
+                       line = ""
+               }
+               out[i] = line
+       }
+       for len(out) > 0 && out[0] == "" {
+               out = out[1:]
+       }
+       for len(out) > 0 && out[len(out)-1] == "" {
+               out = out[:len(out)-1]
+       }
+       return out
+}
+
+// isBlank reports whether s is a blank line.
+func isBlank(s string) bool {
+       return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
+
+// commonPrefix returns the longest common prefix of a and b.
+func commonPrefix(a, b string) string {
+       i := 0
+       for i < len(a) && i < len(b) && a[i] == b[i] {
+               i++
+       }
+       return a[0:i]
+}
+
  // leadingSpace returns the longest prefix of s consisting of spaces and tabs.
  func leadingSpace(s string) string {
         i := 0
@@ -234,6 +380,27 @@ func isOldHeading(line string, all []string, off int) bool {
         return true
  }
  
+// parargraph returns a paragraph block built from the
+// unindented text at the start of lines, along with the remainder of the lines.
+// If there is no unindented text at the start of lines,
+// then paragraph returns a nil Block.
+func (d *parseDoc) paragraph(lines []string) (b Block, rest []string) {
+       // TODO: Paragraph should be interrupted by any indented line,
+       // which is either a list or a code block,
+       // and of course by a blank line.
+       // It should not be interrupted by a # line - headings must stand alone.
+       i := 0
+       for i < len(lines) && lines[i] != "" {
+               i++
+       }
+       lines, rest = lines[:i], lines[i:]
+       if len(lines) == 0 {
+               return nil, rest
+       }
+
+       return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}, rest
+}
+
  // autoURL checks whether s begins with a URL that should be hyperlinked.
  // If so, it returns the URL, which is a prefix of s, and ok == true.
  // Otherwise it returns "", false.
diff --git a/src/go/doc/comment/testdata/hello.txt b/src/go/doc/comment/testdata/hello.txt

new file mode 100644 (file)

index 0000000..4f669fc
--- /dev/null
+++ b/src/go/doc/comment/testdata/hello.txt
@@ -0,0 +1,16 @@
+-- input --
+Hello,
+world
+
+This is
+a test.
+-- dump --
+Doc
+       Paragraph
+               Plain
+                       "Hello,\n"
+                       "world"
+       Paragraph
+               Plain
+                       "This is\n"
+                       "a test."
diff --git a/src/go/doc/comment/testdata_test.go b/src/go/doc/comment/testdata_test.go

new file mode 100644 (file)

index 0000000..a94e76c
--- /dev/null
+++ b/src/go/doc/comment/testdata_test.go
@@ -0,0 +1,168 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+       "bytes"
+       "fmt"
+       "internal/diff"
+       "internal/txtar"
+       "path/filepath"
+       "strings"
+       "testing"
+)
+
+func TestTestdata(t *testing.T) {
+       files, _ := filepath.Glob("testdata/*.txt")
+       if len(files) == 0 {
+               t.Fatalf("no testdata")
+       }
+       var p Parser
+
+       stripDollars := func(b []byte) []byte {
+               // Remove trailing $ on lines.
+               // They make it easier to see lines with trailing spaces,
+               // as well as turning them into lines without trailing spaces,
+               // in case editors remove trailing spaces.
+               return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n"))
+       }
+       for _, file := range files {
+               t.Run(filepath.Base(file), func(t *testing.T) {
+                       a, err := txtar.ParseFile(file)
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       if len(a.Files) < 1 || a.Files[0].Name != "input" {
+                               t.Fatalf("first file is not %q", "input")
+                       }
+                       d := p.Parse(string(stripDollars(a.Files[0].Data)))
+                       for _, f := range a.Files[1:] {
+                               want := stripDollars(f.Data)
+                               for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' {
+                                       want = want[:len(want)-1]
+                               }
+                               var out []byte
+                               switch f.Name {
+                               default:
+                                       t.Fatalf("unknown output file %q", f.Name)
+                               case "dump":
+                                       out = dump(d)
+                               }
+                               if string(out) != string(want) {
+                                       t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out))
+                               }
+                       }
+               })
+       }
+}
+
+func dump(d *Doc) []byte {
+       var out bytes.Buffer
+       dumpTo(&out, 0, d)
+       return out.Bytes()
+}
+
+func dumpTo(out *bytes.Buffer, indent int, x any) {
+       switch x := x.(type) {
+       default:
+               fmt.Fprintf(out, "?%T", x)
+
+       case *Doc:
+               fmt.Fprintf(out, "Doc")
+               dumpTo(out, indent+1, x.Content)
+               if len(x.Links) > 0 {
+                       dumpNL(out, indent+1)
+                       fmt.Fprintf(out, "Links")
+                       dumpTo(out, indent+2, x.Links)
+               }
+               fmt.Fprintf(out, "\n")
+
+       case []*LinkDef:
+               for _, def := range x {
+                       dumpNL(out, indent)
+                       dumpTo(out, indent, def)
+               }
+
+       case *LinkDef:
+               fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL)
+
+       case []Block:
+               for _, blk := range x {
+                       dumpNL(out, indent)
+                       dumpTo(out, indent, blk)
+               }
+
+       case *Heading:
+               fmt.Fprintf(out, "Heading")
+               dumpTo(out, indent+1, x.Text)
+
+       case *List:
+               fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween)
+               dumpTo(out, indent+1, x.Items)
+
+       case []*ListItem:
+               for _, item := range x {
+                       dumpNL(out, indent)
+                       dumpTo(out, indent, item)
+               }
+
+       case *ListItem:
+               fmt.Fprintf(out, "Item Number=%q", x.Number)
+               dumpTo(out, indent+1, x.Content)
+
+       case *Paragraph:
+               fmt.Fprintf(out, "Paragraph")
+               dumpTo(out, indent+1, x.Text)
+
+       case *Code:
+               fmt.Fprintf(out, "Code")
+               dumpTo(out, indent+1, x.Text)
+
+       case []Text:
+               for _, t := range x {
+                       dumpNL(out, indent)
+                       dumpTo(out, indent, t)
+               }
+
+       case Plain:
+               if !strings.Contains(string(x), "\n") {
+                       fmt.Fprintf(out, "Plain %q", string(x))
+               } else {
+                       fmt.Fprintf(out, "Plain")
+                       dumpTo(out, indent+1, string(x))
+               }
+
+       case Italic:
+               if !strings.Contains(string(x), "\n") {
+                       fmt.Fprintf(out, "Italic %q", string(x))
+               } else {
+                       fmt.Fprintf(out, "Italic")
+                       dumpTo(out, indent+1, string(x))
+               }
+
+       case string:
+               for _, line := range strings.SplitAfter(x, "\n") {
+                       if line != "" {
+                               dumpNL(out, indent)
+                               fmt.Fprintf(out, "%q", line)
+                       }
+               }
+
+       case *Link:
+               fmt.Fprintf(out, "Link %q", x.URL)
+               dumpTo(out, indent+1, x.Text)
+
+       case *DocLink:
+               fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name)
+               dumpTo(out, indent+1, x.Text)
+       }
+}
+
+func dumpNL(out *bytes.Buffer, n int) {
+       out.WriteByte('\n')
+       for i := 0; i < n; i++ {
+               out.WriteByte('\t')
+       }
+}
author	Russ Cox <rsc@golang.org>
	Sun, 3 Apr 2022 12:15:40 +0000 (08:15 -0400)
committer	Russ Cox <rsc@golang.org>
	Mon, 11 Apr 2022 16:31:37 +0000 (16:31 +0000)
api/next/51082.txt		patch \| blob \| history
src/go/doc/comment/parse.go		patch \| blob \| history
src/go/doc/comment/testdata/hello.txt	[new file with mode: 0644]	patch \| blob
src/go/doc/comment/testdata_test.go	[new file with mode: 0644]	patch \| blob