[This CL is part of a sequence implementing the proposal #51082.
The design doc is at https://go.dev/s/godocfmt-design.]
Implement parsing of plain text doc paragraphs,
as well as a txtar-based test framework. Subsequent CLs will
implement the rest of the possible markup.
For #51082.
Change-Id: I449aac69b44089f241fde8050ac134e17cb25116
Reviewed-on: https://go-review.googlesource.com/c/go/+/397278
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
pkg go/doc/comment, method (*List) BlankBefore() bool #51082
pkg go/doc/comment, method (*List) BlankBetween() bool #51082
+pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082
pkg go/doc/comment, type Block interface, unexported methods #51082
pkg go/doc/comment, type Code struct #51082
pkg go/doc/comment, type Code struct, Text string #51082
pkg go/doc/comment, type ListItem struct, Number string #51082
pkg go/doc/comment, type Paragraph struct #51082
pkg go/doc/comment, type Paragraph struct, Text []Text #51082
+pkg go/doc/comment, type Parser struct #51082
+pkg go/doc/comment, type Parser struct, LookupPackage func(string) (string, bool) #51082
+pkg go/doc/comment, type Parser struct, LookupSym func(string, string) bool #51082
+pkg go/doc/comment, type Parser struct, Words map[string]string #51082
pkg go/doc/comment, type Plain string #51082
pkg go/doc/comment, type Text interface, unexported methods #51082
func (*DocLink) text() {}
+// A Parser is a doc comment parser.
+// The fields in the struct can be filled in before calling Parse
+// in order to customize the details of the parsing process.
+type Parser struct {
+ // Words is a map of Go identifier words that
+ // should be italicized and potentially linked.
+ // If Words[w] is the empty string, then the word w
+ // is only italicized. Otherwise it is linked, using
+ // Words[w] as the link target.
+ // Words corresponds to the [go/doc.ToHTML] words parameter.
+ Words map[string]string
+
+ // LookupPackage resolves a package name to an import path.
+ //
+ // If LookupPackage(name) returns ok == true, then [name]
+ // (or [name.Sym] or [name.Sym.Method])
+ // is considered a documentation link to importPath's package docs.
+ // It is valid to return "", true, in which case name is considered
+ // to refer to the current package.
+ //
+ // If LookupPackage(name) returns ok == false,
+ // then [name] (or [name.Sym] or [name.Sym.Method])
+ // will not be considered a documentation link,
+ // except in the case where name is the full (but single-element) import path
+ // of a package in the standard library, such as in [math] or [io.Reader].
+ // LookupPackage is still called for such names,
+ // in order to permit references to imports of other packages
+ // with the same package names.
+ //
+ // Setting LookupPackage to nil is equivalent to setting it to
+ // a function that always returns "", false.
+ LookupPackage func(name string) (importPath string, ok bool)
+
+ // LookupSym reports whether a symbol name or method name
+ // exists in the current package.
+ //
+ // If LookupSym("", "Name") returns true, then [Name]
+ // is considered a documentation link for a const, func, type, or var.
+ //
+ // Similarly, if LookupSym("Recv", "Name") returns true,
+ // then [Recv.Name] is considered a documentation link for
+ // type Recv's method Name.
+ //
+ // Setting LookupSym to nil is equivalent to setting it to a function
+ // that always returns false.
+ LookupSym func(recv, name string) (ok bool)
+}
+
+// parseDoc is parsing state for a single doc comment.
+type parseDoc struct {
+ *Parser
+ *Doc
+ links map[string]*LinkDef
+ lines []string
+ lookupSym func(recv, name string) bool
+}
+
+// Parse parses the doc comment text and returns the *Doc form.
+// Comment markers (/* // and */) in the text must have already been removed.
+func (p *Parser) Parse(text string) *Doc {
+ lines := unindent(strings.Split(text, "\n"))
+ d := &parseDoc{
+ Parser: p,
+ Doc: new(Doc),
+ links: make(map[string]*LinkDef),
+ lines: lines,
+ lookupSym: func(recv, name string) bool { return false },
+ }
+ if p.LookupSym != nil {
+ d.lookupSym = p.LookupSym
+ }
+
+ // First pass: break into block structure and collect known links.
+ // The text is all recorded as Plain for now.
+ // TODO: Break into actual block structure.
+ for len(lines) > 0 {
+ line := lines[0]
+ if line != "" {
+ var b Block
+ b, lines = d.paragraph(lines)
+ d.Content = append(d.Content, b)
+ } else {
+ lines = lines[1:]
+ }
+ }
+
+ // Second pass: interpret all the Plain text now that we know the links.
+ // TODO: Actually interpret the plain text.
+
+ return d.Doc
+}
+
+// unindent removes any common space/tab prefix
+// from each line in lines, returning a copy of lines in which
+// those prefixes have been trimmed from each line.
+func unindent(lines []string) []string {
+ // Trim leading and trailing blank lines.
+ for len(lines) > 0 && isBlank(lines[0]) {
+ lines = lines[1:]
+ }
+ for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
+ lines = lines[:len(lines)-1]
+ }
+ if len(lines) == 0 {
+ return nil
+ }
+
+ // Compute and remove common indentation.
+ prefix := leadingSpace(lines[0])
+ for _, line := range lines[1:] {
+ if !isBlank(line) {
+ prefix = commonPrefix(prefix, leadingSpace(line))
+ }
+ }
+
+ out := make([]string, len(lines))
+ for i, line := range lines {
+ line = strings.TrimPrefix(line, prefix)
+ if strings.TrimSpace(line) == "" {
+ line = ""
+ }
+ out[i] = line
+ }
+ for len(out) > 0 && out[0] == "" {
+ out = out[1:]
+ }
+ for len(out) > 0 && out[len(out)-1] == "" {
+ out = out[:len(out)-1]
+ }
+ return out
+}
+
+// isBlank reports whether s is a blank line.
+func isBlank(s string) bool {
+ return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
+}
+
+// commonPrefix returns the longest common prefix of a and b.
+func commonPrefix(a, b string) string {
+ i := 0
+ for i < len(a) && i < len(b) && a[i] == b[i] {
+ i++
+ }
+ return a[0:i]
+}
+
// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
func leadingSpace(s string) string {
i := 0
return true
}
+// parargraph returns a paragraph block built from the
+// unindented text at the start of lines, along with the remainder of the lines.
+// If there is no unindented text at the start of lines,
+// then paragraph returns a nil Block.
+func (d *parseDoc) paragraph(lines []string) (b Block, rest []string) {
+ // TODO: Paragraph should be interrupted by any indented line,
+ // which is either a list or a code block,
+ // and of course by a blank line.
+ // It should not be interrupted by a # line - headings must stand alone.
+ i := 0
+ for i < len(lines) && lines[i] != "" {
+ i++
+ }
+ lines, rest = lines[:i], lines[i:]
+ if len(lines) == 0 {
+ return nil, rest
+ }
+
+ return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}, rest
+}
+
// autoURL checks whether s begins with a URL that should be hyperlinked.
// If so, it returns the URL, which is a prefix of s, and ok == true.
// Otherwise it returns "", false.
--- /dev/null
+-- input --
+Hello,
+world
+
+This is
+a test.
+-- dump --
+Doc
+ Paragraph
+ Plain
+ "Hello,\n"
+ "world"
+ Paragraph
+ Plain
+ "This is\n"
+ "a test."
--- /dev/null
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package comment
+
+import (
+ "bytes"
+ "fmt"
+ "internal/diff"
+ "internal/txtar"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestTestdata(t *testing.T) {
+ files, _ := filepath.Glob("testdata/*.txt")
+ if len(files) == 0 {
+ t.Fatalf("no testdata")
+ }
+ var p Parser
+
+ stripDollars := func(b []byte) []byte {
+ // Remove trailing $ on lines.
+ // They make it easier to see lines with trailing spaces,
+ // as well as turning them into lines without trailing spaces,
+ // in case editors remove trailing spaces.
+ return bytes.ReplaceAll(b, []byte("$\n"), []byte("\n"))
+ }
+ for _, file := range files {
+ t.Run(filepath.Base(file), func(t *testing.T) {
+ a, err := txtar.ParseFile(file)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(a.Files) < 1 || a.Files[0].Name != "input" {
+ t.Fatalf("first file is not %q", "input")
+ }
+ d := p.Parse(string(stripDollars(a.Files[0].Data)))
+ for _, f := range a.Files[1:] {
+ want := stripDollars(f.Data)
+ for len(want) >= 2 && want[len(want)-1] == '\n' && want[len(want)-2] == '\n' {
+ want = want[:len(want)-1]
+ }
+ var out []byte
+ switch f.Name {
+ default:
+ t.Fatalf("unknown output file %q", f.Name)
+ case "dump":
+ out = dump(d)
+ }
+ if string(out) != string(want) {
+ t.Errorf("%s: %s", file, diff.Diff(f.Name, want, "have", out))
+ }
+ }
+ })
+ }
+}
+
+func dump(d *Doc) []byte {
+ var out bytes.Buffer
+ dumpTo(&out, 0, d)
+ return out.Bytes()
+}
+
+func dumpTo(out *bytes.Buffer, indent int, x any) {
+ switch x := x.(type) {
+ default:
+ fmt.Fprintf(out, "?%T", x)
+
+ case *Doc:
+ fmt.Fprintf(out, "Doc")
+ dumpTo(out, indent+1, x.Content)
+ if len(x.Links) > 0 {
+ dumpNL(out, indent+1)
+ fmt.Fprintf(out, "Links")
+ dumpTo(out, indent+2, x.Links)
+ }
+ fmt.Fprintf(out, "\n")
+
+ case []*LinkDef:
+ for _, def := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, def)
+ }
+
+ case *LinkDef:
+ fmt.Fprintf(out, "LinkDef Used:%v Text:%q URL:%s", x.Used, x.Text, x.URL)
+
+ case []Block:
+ for _, blk := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, blk)
+ }
+
+ case *Heading:
+ fmt.Fprintf(out, "Heading")
+ dumpTo(out, indent+1, x.Text)
+
+ case *List:
+ fmt.Fprintf(out, "List ForceBlankBefore=%v ForceBlankBetween=%v", x.ForceBlankBefore, x.ForceBlankBetween)
+ dumpTo(out, indent+1, x.Items)
+
+ case []*ListItem:
+ for _, item := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, item)
+ }
+
+ case *ListItem:
+ fmt.Fprintf(out, "Item Number=%q", x.Number)
+ dumpTo(out, indent+1, x.Content)
+
+ case *Paragraph:
+ fmt.Fprintf(out, "Paragraph")
+ dumpTo(out, indent+1, x.Text)
+
+ case *Code:
+ fmt.Fprintf(out, "Code")
+ dumpTo(out, indent+1, x.Text)
+
+ case []Text:
+ for _, t := range x {
+ dumpNL(out, indent)
+ dumpTo(out, indent, t)
+ }
+
+ case Plain:
+ if !strings.Contains(string(x), "\n") {
+ fmt.Fprintf(out, "Plain %q", string(x))
+ } else {
+ fmt.Fprintf(out, "Plain")
+ dumpTo(out, indent+1, string(x))
+ }
+
+ case Italic:
+ if !strings.Contains(string(x), "\n") {
+ fmt.Fprintf(out, "Italic %q", string(x))
+ } else {
+ fmt.Fprintf(out, "Italic")
+ dumpTo(out, indent+1, string(x))
+ }
+
+ case string:
+ for _, line := range strings.SplitAfter(x, "\n") {
+ if line != "" {
+ dumpNL(out, indent)
+ fmt.Fprintf(out, "%q", line)
+ }
+ }
+
+ case *Link:
+ fmt.Fprintf(out, "Link %q", x.URL)
+ dumpTo(out, indent+1, x.Text)
+
+ case *DocLink:
+ fmt.Fprintf(out, "DocLink pkg:%q, recv:%q, name:%q", x.ImportPath, x.Recv, x.Name)
+ dumpTo(out, indent+1, x.Text)
+ }
+}
+
+func dumpNL(out *bytes.Buffer, n int) {
+ out.WriteByte('\n')
+ for i := 0; i < n; i++ {
+ out.WriteByte('\t')
+ }
+}