From dab9c502c4192d90b6cef6c9e54dc98f2aee932f Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Mon, 24 Jul 2023 10:58:26 -0400 Subject: [PATCH] cmd/compile/internal/inline: add callsite trace output debugging flag Add a new debug flag "-d=dumpinlcallsitescores" that dumps out a summary of all callsites in the package being compiled with info on inlining heuristics, for human consumption. Sample output lines: Score Adjustment Status Callee CallerPos ScoreFlags ... 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj ... 76 -5 PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj ... 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 ... 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj Here "Score" is the final score calculated for the callsite, "Adjustment" is the amount added to or subtracted from the original hairyness estimate to form the score. "Status" shows whether anything changed with the site -- did the adjustment bump it down just below the threshold ("PROMOTED") or instead bump it above the threshold ("DEMOTED") or did nothing happen as a result of the heuristics ("---"); "Status" also shows whether PGO was involved. "Callee" is the name of the function called, "CallerPos" is the position of the callsite, and "ScoreFlags" is a digest of the specific properties we used to make adjustments to callsite score via heuristics. Change-Id: Iea4b1cbfee038bc68df6ab81e9973f145636300b Reviewed-on: https://go-review.googlesource.com/c/go/+/513455 Reviewed-by: Matthew Dempsky LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/base/debug.go | 1 + src/cmd/compile/internal/inline/inl.go | 18 ++- .../internal/inline/inlheur/callsite.go | 8 +- .../internal/inline/inlheur/scoring.go | 115 ++++++++++++++++++ 4 files changed, 138 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/base/debug.go b/src/cmd/compile/internal/base/debug.go index 21e8a31d1f..244ba1bc9d 100644 --- a/src/cmd/compile/internal/base/debug.go +++ b/src/cmd/compile/internal/base/debug.go @@ -22,6 +22,7 @@ type DebugFlags struct { Defer int `help:"print information about defer compilation"` DisableNil int `help:"disable nil checks" concurrent:"ok"` DumpInlFuncProps string `help:"dump function properties from inl heuristics to specified file"` + DumpInlCallSiteScores int `help:"dump scored callsites during inlining"` DumpPtrs int `help:"show Node pointers values in dump output"` DwarfInl int `help:"print information about DWARF inlined function creation"` EscapeMutationsCalls int `help:"print extra escape analysis diagnostics about mutations and calls" concurrent:"ok"` diff --git a/src/cmd/compile/internal/inline/inl.go b/src/cmd/compile/internal/inline/inl.go index 436b353eb9..f1dce85afb 100644 --- a/src/cmd/compile/internal/inline/inl.go +++ b/src/cmd/compile/internal/inline/inl.go @@ -172,6 +172,9 @@ func InlinePackage(p *pgo.Profile) { if base.Debug.DumpInlFuncProps != "" { inlheur.DumpFuncProps(nil, base.Debug.DumpInlFuncProps, nil) } + if goexperiment.NewInliner { + postProcessCallSites(p) + } } // InlineDecls applies inlining to the given batch of declarations. @@ -339,9 +342,10 @@ func CanInline(fn *ir.Func, profile *pgo.Profile) { } // Used a "relaxed" inline budget if goexperiment.NewInliner is in - // effect, or if we're producing a debugging dump for unit testing. + // effect, or if we're producing a debugging dump. relaxed := goexperiment.NewInliner || - (base.Debug.DumpInlFuncProps != "") + (base.Debug.DumpInlFuncProps != "" || + base.Debug.DumpInlCallSiteScores != 0) // Compute the inline budget for this func. budget := inlineBudget(fn, profile, relaxed, base.Debug.PGODebug > 0) @@ -1286,3 +1290,13 @@ func isAtomicCoverageCounterUpdate(cn *ir.CallExpr) bool { v := isIndexingCoverageCounter(adn.X) return v } + +func postProcessCallSites(profile *pgo.Profile) { + if base.Debug.DumpInlCallSiteScores != 0 { + budgetCallback := func(fn *ir.Func, prof *pgo.Profile) (int32, bool) { + v := inlineBudget(fn, prof, false, false) + return v, v == inlineHotMaxBudget + } + inlheur.DumpInlCallSiteScores(profile, budgetCallback) + } +} diff --git a/src/cmd/compile/internal/inline/inlheur/callsite.go b/src/cmd/compile/internal/inline/inlheur/callsite.go index 5b75a67243..2e42cc28da 100644 --- a/src/cmd/compile/internal/inline/inlheur/callsite.go +++ b/src/cmd/compile/internal/inline/inlheur/callsite.go @@ -52,6 +52,10 @@ func GetCallSiteScore(ce *ir.CallExpr) (bool, int) { return true, cs.Score } +func CallSiteTable() CallSiteTab { + return cstab +} + type CSPropBits uint32 const ( @@ -98,7 +102,7 @@ func fmtFullPos(p src.XPos) string { return sb.String() } -func encodeCallSiteKey(cs *CallSite) string { +func EncodeCallSiteKey(cs *CallSite) string { var sb strings.Builder // FIXME: maybe rewrite line offsets relative to function start? sb.WriteString(fmtFullPos(cs.Call.Pos())) @@ -109,7 +113,7 @@ func encodeCallSiteKey(cs *CallSite) string { func buildEncodedCallSiteTab(tab CallSiteTab) encodedCallSiteTab { r := make(encodedCallSiteTab) for _, cs := range tab { - k := encodeCallSiteKey(cs) + k := EncodeCallSiteKey(cs) r[k] = propsAndScore{ props: cs.Flags, score: cs.Score, diff --git a/src/cmd/compile/internal/inline/inlheur/scoring.go b/src/cmd/compile/internal/inline/inlheur/scoring.go index 82a9c2acac..933e7e0701 100644 --- a/src/cmd/compile/internal/inline/inlheur/scoring.go +++ b/src/cmd/compile/internal/inline/inlheur/scoring.go @@ -5,10 +5,14 @@ package inlheur import ( + "cmd/compile/internal/base" "cmd/compile/internal/ir" + "cmd/compile/internal/pgo" "cmd/compile/internal/typecheck" + "cmd/compile/internal/types" "fmt" "os" + "sort" ) // These constants enumerate the set of possible ways/scenarios @@ -230,3 +234,114 @@ func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, score } return score, mask } + +// DumpInlCallSiteScores is invoked by the inliner if the debug flag +// "-d=dumpinlcallsitescores" is set; it dumps out a human-readable +// summary of all (potentially) inlinable callsites in the package, +// along with info on call site scoring and the adjustments made to a +// given score. Here profile is the PGO profile in use (may be +// nil), budgetCallback is a callback that can be invoked to find out +// the original pre-adjustment hairyness limit for the function, and +// inlineHotMaxBudget is the constant of the same name used in the +// inliner. Sample output lines: +// +// Score Adjustment Status Callee CallerPos ScoreFlags +// 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj +// 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj +// 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 +// 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj +// +// In the dump above, "Score" is the final score calculated for the +// callsite, "Adjustment" is the amount added to or subtracted from +// the original hairyness estimate to form the score. "Status" shows +// whether anything changed with the site -- did the adjustment bump +// it down just below the threshold ("PROMOTED") or instead bump it +// above the threshold ("DEMOTED"); this will be blank ("---") if no +// threshold was crossed as a result of the heuristics. Note that +// "Status" also shows whether PGO was involved. "Callee" is the name +// of the function called, "CallerPos" is the position of the +// callsite, and "ScoreFlags" is a digest of the specific properties +// we used to make adjustments to callsite score via heuristics. +func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) { + + fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path) + cstab := CallSiteTable() + + genstatus := func(cs *CallSite, prof *pgo.Profile) string { + hairyval := cs.Callee.Inl.Cost + bud, isPGO := budgetCallback(cs.Callee, prof) + score := int32(cs.Score) + st := "---" + + switch { + case hairyval <= bud && score <= bud: + // "Normal" inlined case: hairy val sufficiently low that + // it would have been inlined anyway without heuristics. + case hairyval > bud && score > bud: + // "Normal" not inlined case: hairy val sufficiently high + // and scoring didn't lower it. + case hairyval > bud && score <= bud: + // Promoted: we would not have inlined it before, but + // after score adjustment we decided to inline. + st = "PROMOTED" + case hairyval <= bud && score > bud: + // Demoted: we would have inlined it before, but after + // score adjustment we decided not to inline. + st = "DEMOTED" + } + if isPGO { + st += " PGO" + } + return st + } + + if base.Debug.DumpInlCallSiteScores != 0 { + sl := make([]*CallSite, 0, len(cstab)) + for _, v := range cstab { + sl = append(sl, v) + } + sort.Slice(sl, func(i, j int) bool { + if sl[i].Score != sl[j].Score { + return sl[i].Score < sl[j].Score + } + fni := ir.PkgFuncName(sl[i].Callee) + fnj := ir.PkgFuncName(sl[j].Callee) + if fni != fnj { + return fni < fnj + } + ecsi := EncodeCallSiteKey(sl[i]) + ecsj := EncodeCallSiteKey(sl[j]) + return ecsi < ecsj + }) + + mkname := func(fn *ir.Func) string { + var n string + if fn == nil || fn.Nname == nil { + return "" + } + if fn.Sym().Pkg == types.LocalPkg { + n = "·" + fn.Sym().Name + } else { + n = ir.PkgFuncName(fn) + } + // don't try to print super-long names + if len(n) <= 64 { + return n + } + return n[:32] + "..." + n[len(n)-32:len(n)] + } + + if len(sl) != 0 { + fmt.Fprintf(os.Stdout, "Score Adjustment Status Callee CallerPos Flags ScoreFlags\n") + } + for _, cs := range sl { + hairyval := cs.Callee.Inl.Cost + adj := int32(cs.Score) - hairyval + fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n", + cs.Score, adj, genstatus(cs, profile), + mkname(cs.Callee), + EncodeCallSiteKey(cs), + cs.ScoreMask.String()) + } + } +} -- 2.50.0