From 1581bb9843815b3393779b28f7c501e2052486ce Mon Sep 17 00:00:00 2001
From: =?utf8?q?Daniel=20Mart=C3=AD?= <mvdan@mvdan.cc>
Date: Tue, 27 Aug 2019 22:47:37 +0200
Subject: [PATCH] cmd/compile: stop using go/types in rulegen
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Using go/types to get rid of all unused variables in CL 189798 was a
neat idea, but it was pretty expensive. go/types is a full typechecker,
which does a lot more work than we actually need. Moreover, we had to
run it multiple times, to catch variables that became unused after
removing existing unused variables.

Instead, write our own little detector for unused imports and variables.
It doesn't use ast.Walk, as we need to know what fields we're
inspecting. For example, in "foo := bar", "foo" is declared, and "bar"
is used, yet they both appear as simple *ast.Ident cases under ast.Walk.

The code is documented to explain how unused variables are detected in a
single syntax tree pass. Since this happens after we've generated a
complete go/ast.File, we don't need to worry about our own simplified
node types.

The generated code is the same, but rulegen is much faster and uses less
memory at its peak, so it should scale better with time.

With 'benchcmd Rulegen go run *.go' on perflock, we get:

	name     old time/op         new time/op         delta
	Rulegen          4.00s Â± 0%          3.41s Â± 1%  -14.70%  (p=0.008 n=5+5)

	name     old user-time/op    new user-time/op    delta
	Rulegen          14.1s Â± 1%          10.6s Â± 1%  -24.62%  (p=0.008 n=5+5)

	name     old sys-time/op     new sys-time/op     delta
	Rulegen          318ms Â±26%          263ms Â± 9%     ~     (p=0.056 n=5+5)

	name     old peak-RSS-bytes  new peak-RSS-bytes  delta
	Rulegen          231MB Â± 4%          181MB Â± 3%  -21.69%  (p=0.008 n=5+5)

Change-Id: I8387d52818f6131357868ad348dac8c96d926191
Reviewed-on: https://go-review.googlesource.com/c/go/+/191782
Run-TryBot: Daniel MartÃ­ <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
---
 src/cmd/compile/internal/ssa/gen/rulegen.go | 285 ++++++++++++++++++--
 1 file changed, 257 insertions(+), 28 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index 2df2e1234b..3a18ca252c 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -21,12 +21,13 @@ import (
 	"go/parser"
 	"go/printer"
 	"go/token"
-	"go/types"
 	"io"
 	"log"
 	"os"
+	"path"
 	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 
 	"golang.org/x/tools/go/ast/astutil"
@@ -266,38 +267,38 @@ func genRulesSuffix(arch arch, suff string) {
 	}
 	tfile := fset.File(file.Pos())
 
-	for n := 0; n < 3; n++ {
-		unused := make(map[token.Pos]bool)
-		conf := types.Config{Error: func(err error) {
-			if terr, ok := err.(types.Error); ok && strings.Contains(terr.Msg, "not used") {
-				unused[terr.Pos] = true
-			}
-		}}
-		_, _ = conf.Check("ssa", fset, []*ast.File{file}, nil)
-		if len(unused) == 0 {
-			break
-		}
-		pre := func(c *astutil.Cursor) bool {
-			if node := c.Node(); node != nil && unused[node.Pos()] {
-				c.Delete()
-				// Unused imports and declarations use exactly
-				// one line. Prevent leaving an empty line.
-				tfile.MergeLine(tfile.Position(node.Pos()).Line)
-				return false
-			}
+	// First, use unusedInspector to find the unused declarations by their
+	// start position.
+	u := unusedInspector{unused: make(map[token.Pos]bool)}
+	u.node(file)
+
+	// Then, delete said nodes via astutil.Apply.
+	pre := func(c *astutil.Cursor) bool {
+		node := c.Node()
+		if node == nil {
 			return true
 		}
-		post := func(c *astutil.Cursor) bool {
-			switch node := c.Node().(type) {
-			case *ast.GenDecl:
-				if len(node.Specs) == 0 {
-					c.Delete()
-				}
+		if u.unused[node.Pos()] {
+			c.Delete()
+			// Unused imports and declarations use exactly
+			// one line. Prevent leaving an empty line.
+			tfile.MergeLine(tfile.Position(node.Pos()).Line)
+			return false
+		}
+		return true
+	}
+	post := func(c *astutil.Cursor) bool {
+		switch node := c.Node().(type) {
+		case *ast.GenDecl:
+			if len(node.Specs) == 0 {
+				// Don't leave a broken or empty GenDecl behind,
+				// such as "import ()".
+				c.Delete()
 			}
-			return true
 		}
-		file = astutil.Apply(file, pre, post).(*ast.File)
+		return true
 	}
+	file = astutil.Apply(file, pre, post).(*ast.File)
 
 	// Write the well-formatted source to file
 	f, err := os.Create("../rewrite" + arch.name + suff + ".go")
@@ -319,6 +320,234 @@ func genRulesSuffix(arch arch, suff string) {
 	}
 }
 
+// unusedInspector can be used to detect unused variables and imports in an
+// ast.Node via its node method. The result is available in the "unused" map.
+//
+// note that unusedInspector is lazy and best-effort; it only supports the node
+// types and patterns used by the rulegen program.
+type unusedInspector struct {
+	// scope is the current scope, which can never be nil when a declaration
+	// is encountered. That is, the unusedInspector.node entrypoint should
+	// generally be an entire file or block.
+	scope *scope
+
+	// unused is the resulting set of unused declared names, indexed by the
+	// starting position of the node that declared the name.
+	unused map[token.Pos]bool
+
+	// defining is the object currently being defined; this is useful so
+	// that if "foo := bar" is unused and removed, we can then detect if
+	// "bar" becomes unused as well.
+	defining *object
+}
+
+// scoped opens a new scope when called, and returns a function which closes
+// that same scope. When a scope is closed, unused variables are recorded.
+func (u *unusedInspector) scoped() func() {
+	outer := u.scope
+	u.scope = &scope{outer: outer, objects: map[string]*object{}}
+	return func() {
+		for anyUnused := true; anyUnused; {
+			anyUnused = false
+			for _, obj := range u.scope.objects {
+				if obj.numUses > 0 {
+					continue
+				}
+				u.unused[obj.pos] = true
+				for _, used := range obj.used {
+					if used.numUses--; used.numUses == 0 {
+						anyUnused = true
+					}
+				}
+				// We've decremented numUses for each of the
+				// objects in used. Zero this slice too, to keep
+				// everything consistent.
+				obj.used = nil
+			}
+		}
+		u.scope = outer
+	}
+}
+
+func (u *unusedInspector) exprs(list []ast.Expr) {
+	for _, x := range list {
+		u.node(x)
+	}
+}
+
+func (u *unusedInspector) stmts(list []ast.Stmt) {
+	for _, x := range list {
+		u.node(x)
+	}
+}
+
+func (u *unusedInspector) decls(list []ast.Decl) {
+	for _, x := range list {
+		u.node(x)
+	}
+}
+
+func (u *unusedInspector) node(node ast.Node) {
+	switch node := node.(type) {
+	case *ast.File:
+		defer u.scoped()()
+		u.decls(node.Decls)
+	case *ast.GenDecl:
+		for _, spec := range node.Specs {
+			u.node(spec)
+		}
+	case *ast.ImportSpec:
+		impPath, _ := strconv.Unquote(node.Path.Value)
+		name := path.Base(impPath)
+		u.scope.objects[name] = &object{
+			name: name,
+			pos:  node.Pos(),
+		}
+	case *ast.FuncDecl:
+		u.node(node.Type)
+		if node.Body != nil {
+			u.node(node.Body)
+		}
+	case *ast.FuncType:
+		if node.Params != nil {
+			u.node(node.Params)
+		}
+		if node.Results != nil {
+			u.node(node.Results)
+		}
+	case *ast.FieldList:
+		for _, field := range node.List {
+			u.node(field)
+		}
+	case *ast.Field:
+		u.node(node.Type)
+
+	// statements
+
+	case *ast.BlockStmt:
+		defer u.scoped()()
+		u.stmts(node.List)
+	case *ast.IfStmt:
+		if node.Init != nil {
+			u.node(node.Init)
+		}
+		u.node(node.Cond)
+		u.node(node.Body)
+		if node.Else != nil {
+			u.node(node.Else)
+		}
+	case *ast.ForStmt:
+		if node.Init != nil {
+			u.node(node.Init)
+		}
+		if node.Cond != nil {
+			u.node(node.Cond)
+		}
+		if node.Post != nil {
+			u.node(node.Post)
+		}
+		u.node(node.Body)
+	case *ast.SwitchStmt:
+		if node.Init != nil {
+			u.node(node.Init)
+		}
+		if node.Tag != nil {
+			u.node(node.Tag)
+		}
+		u.node(node.Body)
+	case *ast.CaseClause:
+		u.exprs(node.List)
+		defer u.scoped()()
+		u.stmts(node.Body)
+	case *ast.BranchStmt:
+	case *ast.ExprStmt:
+		u.node(node.X)
+	case *ast.AssignStmt:
+		if node.Tok != token.DEFINE {
+			u.exprs(node.Rhs)
+			u.exprs(node.Lhs)
+			break
+		}
+		if len(node.Lhs) != 1 {
+			panic("no support for := with multiple names")
+		}
+
+		name := node.Lhs[0].(*ast.Ident)
+		obj := &object{
+			name: name.Name,
+			pos:  name.NamePos,
+		}
+
+		old := u.defining
+		u.defining = obj
+		u.exprs(node.Rhs)
+		u.defining = old
+
+		u.scope.objects[name.Name] = obj
+	case *ast.ReturnStmt:
+		u.exprs(node.Results)
+
+	// expressions
+
+	case *ast.CallExpr:
+		u.node(node.Fun)
+		u.exprs(node.Args)
+	case *ast.SelectorExpr:
+		u.node(node.X)
+	case *ast.UnaryExpr:
+		u.node(node.X)
+	case *ast.BinaryExpr:
+		u.node(node.X)
+		u.node(node.Y)
+	case *ast.StarExpr:
+		u.node(node.X)
+	case *ast.ParenExpr:
+		u.node(node.X)
+	case *ast.IndexExpr:
+		u.node(node.X)
+		u.node(node.Index)
+	case *ast.TypeAssertExpr:
+		u.node(node.X)
+		u.node(node.Type)
+	case *ast.Ident:
+		if obj := u.scope.Lookup(node.Name); obj != nil {
+			obj.numUses++
+			if u.defining != nil {
+				u.defining.used = append(u.defining.used, obj)
+			}
+		}
+	case *ast.BasicLit:
+	default:
+		panic(fmt.Sprintf("unhandled node: %T", node))
+	}
+}
+
+// scope keeps track of a certain scope and its declared names, as well as the
+// outer (parent) scope.
+type scope struct {
+	outer   *scope             // can be nil, if this is the top-level scope
+	objects map[string]*object // indexed by each declared name
+}
+
+func (s *scope) Lookup(name string) *object {
+	if obj := s.objects[name]; obj != nil {
+		return obj
+	}
+	if s.outer == nil {
+		return nil
+	}
+	return s.outer.Lookup(name)
+}
+
+// object keeps track of a declared name, such as a variable or import.
+type object struct {
+	name string
+	pos  token.Pos // start position of the node declaring the object
+
+	numUses int       // number of times this object is used
+	used    []*object // objects that its declaration makes use of
+}
+
 func fprint(w io.Writer, n Node) {
 	switch n := n.(type) {
 	case *File:
-- 
2.51.0