From 322cd4e8078efc3c903aab2ab6fd1e0f200b282b Mon Sep 17 00:00:00 2001 From: Than McIntosh Date: Wed, 1 Nov 2023 14:20:05 -0400 Subject: [PATCH] cmd/compile/internal/inline: refactor call site scoring Rework the call site scoring process to relocate the code that looks for interesting actual expressions at callsites (e.g. passing a constant, passing a function pointer, etc) back into the original callsite analysis phase, as opposed to trying to do the analysis at scoring time. No changes to heuristics functionality; this doesn't have much benefit here, but will make it easier later on (in a future ptahc) to reduce ir.StaticValue calls. Change-Id: I0e946f9589310a405951cb41835a819d38158e45 Reviewed-on: https://go-review.googlesource.com/c/go/+/539317 LUCI-TryBot-Result: Go LUCI Reviewed-by: Matthew Dempsky --- .../inlheur/actualexprpropbits_string.go | 58 +++++++++++++++++ .../inline/inlheur/analyze_func_callsites.go | 64 +++++++++++++++---- .../internal/inline/inlheur/callsite.go | 23 +++++-- .../internal/inline/inlheur/scoring.go | 44 +++++++------ .../inline/inlheur/testdata/props/calls.go | 2 +- 5 files changed, 156 insertions(+), 35 deletions(-) create mode 100644 src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go diff --git a/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go b/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go new file mode 100644 index 0000000000..2faf76f487 --- /dev/null +++ b/src/cmd/compile/internal/inline/inlheur/actualexprpropbits_string.go @@ -0,0 +1,58 @@ +// Code generated by "stringer -bitset -type ActualExprPropBits"; DO NOT EDIT. + +package inlheur + +import "strconv" +import "bytes" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ActualExprConstant-1] + _ = x[ActualExprIsConcreteConvIface-2] + _ = x[ActualExprIsFunc-4] + _ = x[ActualExprIsInlinableFunc-8] +} + +var _ActualExprPropBits_value = [...]uint64{ + 0x1, /* ActualExprConstant */ + 0x2, /* ActualExprIsConcreteConvIface */ + 0x4, /* ActualExprIsFunc */ + 0x8, /* ActualExprIsInlinableFunc */ +} + +const _ActualExprPropBits_name = "ActualExprConstantActualExprIsConcreteConvIfaceActualExprIsFuncActualExprIsInlinableFunc" + +var _ActualExprPropBits_index = [...]uint8{0, 18, 47, 63, 88} + +func (i ActualExprPropBits) String() string { + var b bytes.Buffer + + remain := uint64(i) + seen := false + + for k, v := range _ActualExprPropBits_value { + x := _ActualExprPropBits_name[_ActualExprPropBits_index[k]:_ActualExprPropBits_index[k+1]] + if v == 0 { + if i == 0 { + b.WriteString(x) + return b.String() + } + continue + } + if (v & remain) == v { + remain &^= v + x := _ActualExprPropBits_name[_ActualExprPropBits_index[k]:_ActualExprPropBits_index[k+1]] + if seen { + b.WriteString("|") + } + seen = true + b.WriteString(x) + } + } + if remain == 0 { + return b.String() + } + return "ActualExprPropBits(0x" + strconv.FormatInt(int64(i), 16) + ")" +} diff --git a/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go b/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go index 9c8fac4e9e..3e285d5181 100644 --- a/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go +++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_callsites.go @@ -7,6 +7,7 @@ package inlheur import ( "cmd/compile/internal/ir" "cmd/compile/internal/pgo" + "cmd/compile/internal/typecheck" "fmt" "os" "strings" @@ -124,15 +125,58 @@ func (csa *callSiteAnalyzer) determinePanicPathBits(call ir.Node, r CSPropBits) return r } +// propsForArg returns property bits for a given call argument expression arg. +func (csa *callSiteAnalyzer) propsForArg(arg ir.Node) ActualExprPropBits { + _, islit := isLiteral(arg) + if islit { + return ActualExprConstant + } + if isConcreteConvIface(arg) { + return ActualExprIsConcreteConvIface + } + fname, isfunc, _ := isFuncName(arg) + if isfunc { + if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) { + return ActualExprIsInlinableFunc + } + return ActualExprIsFunc + } + return 0 +} + +// argPropsForCall returns a slice of argument properties for the +// expressions being passed to the callee in the specific call +// expression; these will be stored in the CallSite object for a given +// call and then consulted when scoring. If no arg has any interesting +// properties we try to save some space and return a nil slice. +func (csa *callSiteAnalyzer) argPropsForCall(ce *ir.CallExpr) []ActualExprPropBits { + rv := make([]ActualExprPropBits, len(ce.Args)) + somethingInteresting := false + for idx := range ce.Args { + argProp := csa.propsForArg(ce.Args[idx]) + somethingInteresting = somethingInteresting || (argProp != 0) + rv[idx] = argProp + } + if !somethingInteresting { + return nil + } + return rv +} + func (csa *callSiteAnalyzer) addCallSite(callee *ir.Func, call *ir.CallExpr) { flags := csa.flagsForNode(call) + argProps := csa.argPropsForCall(call) + if debugTrace&debugTraceCalls != 0 { + fmt.Fprintf(os.Stderr, "=-= props %+v for call %v\n", argProps, call) + } // FIXME: maybe bulk-allocate these? cs := &CallSite{ - Call: call, - Callee: callee, - Assign: csa.containingAssignment(call), - Flags: flags, - ID: uint(len(csa.cstab)), + Call: call, + Callee: callee, + Assign: csa.containingAssignment(call), + ArgProps: argProps, + Flags: flags, + ID: uint(len(csa.cstab)), } if _, ok := csa.cstab[call]; ok { fmt.Fprintf(os.Stderr, "*** cstab duplicate entry at: %s\n", @@ -140,12 +184,10 @@ func (csa *callSiteAnalyzer) addCallSite(callee *ir.Func, call *ir.CallExpr) { fmt.Fprintf(os.Stderr, "*** call: %+v\n", call) panic("bad") } - if callee.Inl != nil { - // Set initial score for callsite to the cost computed - // by CanInline; this score will be refined later based - // on heuristics. - cs.Score = int(callee.Inl.Cost) - } + // Set initial score for callsite to the cost computed + // by CanInline; this score will be refined later based + // on heuristics. + cs.Score = int(callee.Inl.Cost) if csa.cstab == nil { csa.cstab = make(CallSiteTab) diff --git a/src/cmd/compile/internal/inline/inlheur/callsite.go b/src/cmd/compile/internal/inline/inlheur/callsite.go index baa1c20dcf..f457dd439b 100644 --- a/src/cmd/compile/internal/inline/inlheur/callsite.go +++ b/src/cmd/compile/internal/inline/inlheur/callsite.go @@ -25,11 +25,13 @@ import ( // the site, and "ID" is a numeric ID for the site within its // containing function. type CallSite struct { - Callee *ir.Func - Call *ir.CallExpr - parent *CallSite - Assign ir.Node - Flags CSPropBits + Callee *ir.Func + Call *ir.CallExpr + parent *CallSite + Assign ir.Node + Flags CSPropBits + + ArgProps []ActualExprPropBits Score int ScoreMask scoreAdjustTyp ID uint @@ -43,6 +45,17 @@ type CallSite struct { // with many calls that share the same auto-generated pos. type CallSiteTab map[*ir.CallExpr]*CallSite +// ActualExprPropBits describes a property of an actual expression (value +// passed to some specific func argument at a call site). +type ActualExprPropBits uint8 + +const ( + ActualExprConstant ActualExprPropBits = 1 << iota + ActualExprIsConcreteConvIface + ActualExprIsFunc + ActualExprIsInlinableFunc +) + type CSPropBits uint32 const ( diff --git a/src/cmd/compile/internal/inline/inlheur/scoring.go b/src/cmd/compile/internal/inline/inlheur/scoring.go index 9c29952edc..8c1cfb8cf8 100644 --- a/src/cmd/compile/internal/inline/inlheur/scoring.go +++ b/src/cmd/compile/internal/inline/inlheur/scoring.go @@ -8,7 +8,6 @@ import ( "cmd/compile/internal/base" "cmd/compile/internal/ir" "cmd/compile/internal/pgo" - "cmd/compile/internal/typecheck" "cmd/compile/internal/types" "fmt" "os" @@ -187,8 +186,13 @@ func mustToMay(x scoreAdjustTyp) scoreAdjustTyp { // callee function is 'callee' and with previously computed call site // properties 'csflags', then computes a score for the callsite that // combines the size cost of the callee with heuristics based on -// previously parameter and function properties. -func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, csflags CSPropBits) (int, scoreAdjustTyp) { +// previously parameter and function properties, then stores the score +// and the adjustment mask in the appropriate fields in 'cs' +func (cs *CallSite) computeCallSiteScore(calleeProps *FuncProps) { + callee := cs.Callee + csflags := cs.Flags + call := cs.Call + // Start with the size-based score for the callee. score := int(callee.Inl.Cost) var tmask scoreAdjustTyp @@ -212,32 +216,36 @@ func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, } if calleeProps == nil { - return score, tmask + cs.Score, cs.ScoreMask = score, tmask + return } // Walk through the actual expressions being passed at the call. calleeRecvrParms := callee.Type().RecvParams() - ce := call.(*ir.CallExpr) - for idx := range ce.Args { + for idx := range call.Args { // ignore blanks if calleeRecvrParms[idx].Sym == nil || calleeRecvrParms[idx].Sym.IsBlank() { continue } - arg := ce.Args[idx] + arg := call.Args[idx] pflag := calleeProps.ParamFlags[idx] if debugTrace&debugTraceScoring != 0 { fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n", - idx, len(ce.Args), arg, pflag.String()) + idx, len(call.Args), arg, pflag.String()) + } + + if len(cs.ArgProps) == 0 { + continue } - _, islit := isLiteral(arg) - iscci := isConcreteConvIface(arg) - fname, isfunc, _ := isFuncName(arg) + argProps := cs.ArgProps[idx] + if debugTrace&debugTraceScoring != 0 { - fmt.Fprintf(os.Stderr, "=-= isLit=%v iscci=%v isfunc=%v for arg %v\n", islit, iscci, isfunc, arg) + fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n", + idx, argProps.String(), arg) } - if islit { + if argProps&ActualExprConstant != 0 { if pflag&ParamMayFeedIfOrSwitch != 0 { score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask) } @@ -246,7 +254,7 @@ func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, } } - if iscci { + if argProps&ActualExprIsConcreteConvIface != 0 { // FIXME: ideally here it would be nice to make a // distinction between the inlinable case and the // non-inlinable case, but this is hard to do. Example: @@ -279,10 +287,10 @@ func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, } } - if isfunc { + if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 { mayadj := passFuncToNestedIndCallAdj mustadj := passFuncToIndCallAdj - if fn := fname.Func; fn != nil && typecheck.HaveInlineBody(fn) { + if argProps&ActualExprIsInlinableFunc != 0 { mayadj = passInlinableFuncToNestedIndCallAdj mustadj = passInlinableFuncToIndCallAdj } @@ -295,7 +303,7 @@ func computeCallSiteScore(callee *ir.Func, calleeProps *FuncProps, call ir.Node, } } - return score, tmask + cs.Score, cs.ScoreMask = score, tmask } func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) { @@ -501,7 +509,7 @@ func scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallRes continue } } - cs.Score, cs.ScoreMask = computeCallSiteScore(cs.Callee, cprops, cs.Call, cs.Flags) + cs.computeCallSiteScore(cprops) if doCallResults { if debugTrace&debugTraceScoring != 0 { diff --git a/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go b/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go index b1499dbf24..4efc3c4003 100644 --- a/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go +++ b/src/cmd/compile/internal/inline/inlheur/testdata/props/calls.go @@ -183,7 +183,7 @@ func T_pass_noninlinable_func_to_param_feeding_nested_indirect_call(x int) int { // {"Flags":0,"ParamFlags":[0,0],"ResultFlags":[0]} // callsite: calls.go:209:14|0 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" // callsite: calls.go:210:15|1 flagstr "CallSiteOnPanicPath" flagval 2 score 42 mask 1 maskstr "panicPathAdj" -// callsite: calls.go:212:19|2 flagstr "" flagval 0 score 16 mask 512 maskstr "passInlinableFuncToIndCallAdj" +// callsite: calls.go:212:19|2 flagstr "" flagval 0 score 36 mask 128 maskstr "passFuncToIndCallAdj" // callsite: calls.go:212:19|calls.go:232:10|0 flagstr "" flagval 0 score 4 mask 0 maskstr "" // // -- 2.50.0