// The -d option takes a comma-separated list of settings.
// Each setting is name=value; for ints, name is short for name=1.
type DebugFlags struct {
+ AlignHot int `help:"enable hot block alignment (currently requires -pgo)" concurrent:"ok"`
Append int `help:"print information about append compilation"`
Checkptr int `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
Closure int `help:"print information about closure compilation"`
Debug.ConcurrentOk = true
Debug.MaxShapeLen = 500
+ Debug.AlignHot = 1
Debug.InlFuncsWithClosures = 1
Debug.InlStaticInit = 1
Debug.PGOInline = 1
"cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/objw"
+ "cmd/compile/internal/pgoir"
"cmd/compile/internal/ssagen"
"cmd/compile/internal/staticinit"
"cmd/compile/internal/types"
// compileFunctions compiles all functions in compilequeue.
// It fans out nBackendWorkers to do the work
// and waits for them to complete.
-func compileFunctions() {
+func compileFunctions(profile *pgoir.Profile) {
if race.Enabled {
// Randomize compilation order to try to shake out races.
tmp := make([]*ir.Func, len(compilequeue))
for _, fn := range fns {
fn := fn
queue(func(worker int) {
- ssagen.Compile(fn, worker)
+ ssagen.Compile(fn, worker, profile)
compile(fn.Closures)
wg.Done()
})
// as late as possible to maximize how much work we can batch and
// process concurrently.
if len(compilequeue) != 0 {
- compileFunctions()
+ compileFunctions(profile)
continue
}
// TODO(prattmic): Make this non-global.
candHotCalleeMap = make(map[*pgoir.IRNode]struct{})
+ // Set of functions that contain hot call sites.
+ hasHotCall = make(map[*ir.Func]struct{})
+
// List of all hot call sites. CallSiteInfo.Callee is always nil.
// TODO(prattmic): Make this non-global.
candHotEdgeMap = make(map[pgoir.CallSiteInfo]struct{})
inlineHotMaxBudget int32 = 2000
)
+func IsPgoHotFunc(fn *ir.Func, profile *pgoir.Profile) bool {
+ if profile == nil {
+ return false
+ }
+ if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok {
+ _, ok := candHotCalleeMap[n]
+ return ok
+ }
+ return false
+}
+
+func HasPgoHotInline(fn *ir.Func) bool {
+ _, has := hasHotCall[fn]
+ return has
+}
+
// PGOInlinePrologue records the hot callsites from ir-graph.
func PGOInlinePrologue(p *pgoir.Profile) {
if base.Debug.PGOInlineCDFThreshold != "" {
func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose bool) int32 {
// Update the budget for profile-guided inlining.
budget := int32(inlineMaxBudget)
- if profile != nil {
- if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok {
- if _, ok := candHotCalleeMap[n]; ok {
- budget = inlineHotMaxBudget
- if verbose {
- fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
- }
- }
+ if IsPgoHotFunc(fn, profile) {
+ budget = inlineHotMaxBudget
+ if verbose {
+ fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn))
}
}
if relaxed {
// Check whether we'd actually inline this call. Set
// log == false since we aren't actually doing inlining
// yet.
- if ok, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok {
+ if ok, _, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok {
// mkinlcall would inline this call [1], so use
// the cost of the inline body as the cost of
// the call, as that is what will actually
// inlineCostOK returns true if call n from caller to callee is cheap enough to
// inline. bigCaller indicates that caller is a big function.
//
-// In addition to the "cost OK" boolean, it also returns the "max
-// cost" limit used to make the decision (which may differ depending
-// on func size), and the score assigned to this specific callsite.
-func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32) {
+// In addition to the "cost OK" boolean, it also returns
+// - the "max cost" limit used to make the decision (which may differ depending on func size)
+// - the score assigned to this specific callsite
+// - whether the inlined function is "hot" according to PGO.
+func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32, bool) {
maxCost := int32(inlineMaxBudget)
if bigCaller {
// We use this to restrict inlining into very big functions.
}
}
+ lineOffset := pgoir.NodeLineOffset(n, caller)
+ csi := pgoir.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
+ _, hot := candHotEdgeMap[csi]
+
if metric <= maxCost {
// Simple case. Function is already cheap enough.
- return true, 0, metric
+ return true, 0, metric, hot
}
// We'll also allow inlining of hot functions below inlineHotMaxBudget,
// but only in small functions.
- lineOffset := pgoir.NodeLineOffset(n, caller)
- csi := pgoir.CallSiteInfo{LineOffset: lineOffset, Caller: caller}
- if _, ok := candHotEdgeMap[csi]; !ok {
+ if !hot {
// Cold
- return false, maxCost, metric
+ return false, maxCost, metric, false
}
// Hot
if base.Debug.PGODebug > 0 {
fmt.Printf("hot-big check disallows inlining for call %s (cost %d) at %v in big function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller))
}
- return false, maxCost, metric
+ return false, maxCost, metric, false
}
if metric > inlineHotMaxBudget {
- return false, inlineHotMaxBudget, metric
+ return false, inlineHotMaxBudget, metric, false
}
if !base.PGOHash.MatchPosWithInfo(n.Pos(), "inline", nil) {
// De-selected by PGO Hash.
- return false, maxCost, metric
+ return false, maxCost, metric, false
}
if base.Debug.PGODebug > 0 {
fmt.Printf("hot-budget check allows inlining for call %s (cost %d) at %v in function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller))
}
- return true, 0, metric
+ return true, 0, metric, hot
}
// canInlineCallExpr returns true if the call n from caller to callee
-// can be inlined, plus the score computed for the call expr in
-// question. bigCaller indicates that caller is a big function. log
+// can be inlined, plus the score computed for the call expr in question,
+// and whether the callee is hot according to PGO.
+// bigCaller indicates that caller is a big function. log
// indicates that the 'cannot inline' reason should be logged.
//
// Preconditions: CanInline(callee) has already been called.
-func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32) {
+func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32, bool) {
if callee.Inl == nil {
// callee is never inlinable.
if log && logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
fmt.Sprintf("%s cannot be inlined", ir.PkgFuncName(callee)))
}
- return false, 0
+ return false, 0, false
}
- ok, maxCost, callSiteScore := inlineCostOK(n, callerfn, callee, bigCaller)
+ ok, maxCost, callSiteScore, hot := inlineCostOK(n, callerfn, callee, bigCaller)
if !ok {
// callee cost too high for this call site.
if log && logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
fmt.Sprintf("cost %d of %s exceeds max caller cost %d", callee.Inl.Cost, ir.PkgFuncName(callee), maxCost))
}
- return false, 0
+ return false, 0, false
}
if callee == callerfn {
if log && logopt.Enabled() {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", fmt.Sprintf("recursive call to %s", ir.FuncName(callerfn)))
}
- return false, 0
+ return false, 0, false
}
if base.Flag.Cfg.Instrumenting && types.IsNoInstrumentPkg(callee.Sym().Pkg) {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
fmt.Sprintf("call to runtime function %s in instrumented build", ir.PkgFuncName(callee)))
}
- return false, 0
+ return false, 0, false
}
if base.Flag.Race && types.IsNoRacePkg(callee.Sym().Pkg) {
logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn),
fmt.Sprintf(`call to into "no-race" package function %s in race build`, ir.PkgFuncName(callee)))
}
- return false, 0
+ return false, 0, false
}
// Check if we've already inlined this function at this particular
fmt.Sprintf("repeated recursive cycle to %s", ir.PkgFuncName(callee)))
}
}
- return false, 0
+ return false, 0, false
}
}
- return true, callSiteScore
+ return true, callSiteScore, hot
}
// mkinlcall returns an OINLCALL node that can replace OCALLFUNC n, or
//
// n.Left = mkinlcall(n.Left, fn, isddd)
func mkinlcall(callerfn *ir.Func, n *ir.CallExpr, fn *ir.Func, bigCaller bool) *ir.InlinedCallExpr {
- ok, score := canInlineCallExpr(callerfn, n, fn, bigCaller, true)
+ ok, score, hot := canInlineCallExpr(callerfn, n, fn, bigCaller, true)
if !ok {
return nil
}
+ if hot {
+ hasHotCall[callerfn] = struct{}{}
+ }
typecheck.AssertFixedCall(n)
parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex()
// After flagalloc, records whether flags are live at the end of the block.
FlagsLiveAtEnd bool
+ // A block that would be good to align (according to the optimizer's guesses)
+ Hotness Hotness
+
// Subsequent blocks, if any. The number and order depend on the block kind.
Succs []Edge
}
// BlockKind is the kind of SSA block.
-type BlockKind int16
+type BlockKind uint8
// short form print
func (b *Block) String() string {
BranchUnknown = BranchPrediction(0)
BranchLikely = BranchPrediction(+1)
)
+
+type Hotness int8 // Could use negative numbers for specifically non-hot blocks, but don't, yet.
+const (
+ // These values are arranged in what seems to be order of increasing alignment importance.
+ // Currently only a few are relevant. Implicitly, they are all in a loop.
+ HotNotFlowIn Hotness = 1 << iota // This block is only reached by branches
+ HotInitial // In the block order, the first one for a given loop. Not necessarily topological header.
+ HotPgo // By PGO-based heuristics, this block occurs in a hot loop
+
+ HotNot = 0
+ HotInitialNotFlowIn = HotInitial | HotNotFlowIn // typically first block of a rotated loop, loop is entered with a branch (not to this block). No PGO
+ HotPgoInitial = HotPgo | HotInitial // special case; single block loop, initial block is header block has a flow-in entry, but PGO says it is hot
+ HotPgoInitialNotFLowIn = HotPgo | HotInitial | HotNotFlowIn // PGO says it is hot, and the loop is rotated so flow enters loop with a branch
+)
laidout bool // Blocks are ordered
NoSplit bool // true if function is marked as nosplit. Used by schedule check pass.
dumpFileSeq uint8 // the sequence numbers of dump file. (%s_%02d__%s.dump", funcname, dumpFileSeq, phaseName)
+ IsPgoHot bool
// when register allocation is done, maps value ids to locations
RegAlloc []Location
}
p = e.b
}
- if p == nil || p == b {
+ if p == nil {
continue
}
+ p.Hotness |= HotInitial
+ if f.IsPgoHot {
+ p.Hotness |= HotPgo
+ }
+ // blocks will be arranged so that p is ordered first, if it isn't already.
+ if p == b { // p is header, already first (and also, only block in the loop)
+ continue
+ }
+ p.Hotness |= HotNotFlowIn
+
+ // the loop header b follows p
after[p.ID] = []*Block{b}
for {
nextIdx := idToIdx[b.ID] + 1
"sync"
"cmd/compile/internal/base"
+ "cmd/compile/internal/inline"
"cmd/compile/internal/ir"
"cmd/compile/internal/liveness"
"cmd/compile/internal/objw"
+ "cmd/compile/internal/pgoir"
"cmd/compile/internal/ssa"
"cmd/compile/internal/types"
"cmd/internal/obj"
// uses it to generate a plist,
// and flushes that plist to machine code.
// worker indicates which of the backend workers is doing the processing.
-func Compile(fn *ir.Func, worker int) {
- f := buildssa(fn, worker)
+func Compile(fn *ir.Func, worker int, profile *pgoir.Profile) {
+ f := buildssa(fn, worker, inline.IsPgoHotFunc(fn, profile) || inline.HasPgoHotInline(fn))
// Note: check arg size to fix issue 25507.
if f.Frontend().(*ssafn).stksize >= maxStackSize || f.OwnAux.ArgWidth() >= maxStackSize {
largeStackFramesMu.Lock()
// buildssa builds an SSA function for fn.
// worker indicates which of the backend workers is doing the processing.
-func buildssa(fn *ir.Func, worker int) *ssa.Func {
+func buildssa(fn *ir.Func, worker int, isPgoHot bool) *ssa.Func {
name := ir.FuncName(fn)
abiSelf := abiForFunc(fn, ssaConfig.ABI0, ssaConfig.ABI1)
// Allocate starting block
s.f.Entry = s.f.NewBlock(ssa.BlockPlain)
s.f.Entry.Pos = fn.Pos()
+ s.f.IsPgoHot = isPgoHot
if printssa {
ssaDF := ssaDumpFile
var argLiveIdx int = -1 // argument liveness info index
+ // These control cache line alignment; if the required portion of
+ // a cache line is not available, then pad to obtain cache line
+ // alignment. Not implemented on all architectures, may not be
+ // useful on all architectures.
+ var hotAlign, hotRequire int64
+
+ if base.Debug.AlignHot > 0 {
+ switch base.Ctxt.Arch.Name {
+ // enable this on a case-by-case basis, with benchmarking.
+ // currently shown:
+ // good for amd64
+ // not helpful for Apple Silicon
+ //
+ case "amd64", "386":
+ // Align to 64 if 31 or fewer bytes remain in a cache line
+ // benchmarks a little better than always aligning, and also
+ // adds slightly less to the (PGO-compiled) binary size.
+ hotAlign = 64
+ hotRequire = 31
+ }
+ }
+
// Emit basic blocks
for i, b := range f.Blocks {
- s.bstart[b.ID] = s.pp.Next
+
s.lineRunStart = nil
s.SetPos(s.pp.Pos.WithNotStmt()) // It needs a non-empty Pos, but cannot be a statement boundary (yet).
+ if hotAlign > 0 && b.Hotness&ssa.HotPgoInitial == ssa.HotPgoInitial {
+ // So far this has only been shown profitable for PGO-hot loop headers.
+ // The Hotness values allows distinctions betwen initial blocks that are "hot" or not, and "flow-in" or not.
+ // Currently only the initial blocks of loops are tagged in this way;
+ // there are no blocks tagged "pgo-hot" that are not also tagged "initial".
+ // TODO more heuristics, more architectures.
+ p := s.pp.Prog(obj.APCALIGNMAX)
+ p.From.SetConst(hotAlign)
+ p.To.SetConst(hotRequire)
+ }
+
+ s.bstart[b.ID] = s.pp.Next
+
if idx, ok := argLiveBlockMap[b.ID]; ok && idx != argLiveIdx {
argLiveIdx = idx
p := s.pp.Prog(obj.APCDATA)
// going to emit anyway, and use those instructions instead of the
// inline marks.
for p := s.pp.Text; p != nil; p = p.Link {
- if p.As == obj.ANOP || p.As == obj.AFUNCDATA || p.As == obj.APCDATA || p.As == obj.ATEXT || p.As == obj.APCALIGN || Arch.LinkArch.Family == sys.Wasm {
+ if p.As == obj.ANOP || p.As == obj.AFUNCDATA || p.As == obj.APCDATA || p.As == obj.ATEXT ||
+ p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX || Arch.LinkArch.Family == sys.Wasm {
// Don't use 0-sized instructions as inline marks, because we need
// to identify inline mark instructions by pc offset.
// (Some of these instructions are sometimes zero-sized, sometimes not.
{obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
{obj.ANOP, C_ZREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
{obj.ANOP, C_VREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
- {obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
- {obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
- {obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code
+ {obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
+ {obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_SBRA, C_NONE, 5, 4, 0, 0, 0}, // same as AB/ABL
+ {obj.APCALIGN, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // align code
+ {obj.APCALIGNMAX, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0, 0}, // align code, conditional
}
// Valid pstate field values, and value to use in instruction.
m = o.size(c.ctxt, p)
if m == 0 {
switch p.As {
- case obj.APCALIGN:
- alignedValue := p.From.Offset
- m = pcAlignPadLength(ctxt, pc, alignedValue)
- // Update the current text symbol alignment value.
- if int32(alignedValue) > cursym.Func().Align {
- cursym.Func().Align = int32(alignedValue)
- }
+ case obj.APCALIGN, obj.APCALIGNMAX:
+ m = obj.AlignmentPadding(int32(pc), p, ctxt, cursym)
break
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
continue
if m == 0 {
switch p.As {
- case obj.APCALIGN:
- alignedValue := p.From.Offset
- m = pcAlignPadLength(ctxt, pc, alignedValue)
+ case obj.APCALIGN, obj.APCALIGNMAX:
+ m = obj.AlignmentPaddingLength(int32(pc), p, ctxt)
break
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
continue
if sz > 4*len(out) {
log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p)
}
- if p.As == obj.APCALIGN {
- alignedValue := p.From.Offset
- v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue)
+ if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
+ v := obj.AlignmentPaddingLength(int32(p.Pc), p, c.ctxt)
for i = 0; i < int(v/4); i++ {
// emit ANOOP instruction by the padding size
c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP)
obj.AUNDEF,
obj.AFUNCDATA,
obj.APCALIGN,
+ obj.APCALIGNMAX,
obj.APCDATA,
obj.ADUFFZERO,
obj.ADUFFCOPY:
AJMP
ANOP
APCALIGN
+ APCALIGNMAX // currently x86, amd64 and arm64
APCDATA
ARET
AGETCALLERPC
import (
"bytes"
+ "cmd/internal/objabi"
"fmt"
"internal/abi"
"internal/buildcfg"
"JMP",
"NOP",
"PCALIGN",
+ "PCALIGNMAX",
"PCDATA",
"RET",
"GETCALLERPC",
}
return fmt.Sprintf("<%s>", a.Sym.ABI())
}
+
+// AlignmentPadding bytes to add to align code as requested.
+// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
+//
+// pc_: current offset in function, in bytes
+// p: a PCALIGN or PCALIGNMAX prog
+// ctxt: the context, for current function
+// cursym: current function being assembled
+// returns number of bytes of padding needed,
+// updates minimum alignment for the function.
+func AlignmentPadding(pc int32, p *Prog, ctxt *Link, cursym *LSym) int {
+ v := AlignmentPaddingLength(pc, p, ctxt)
+ requireAlignment(p.From.Offset, ctxt, cursym)
+ return v
+}
+
+// AlignmentPaddingLength is the number of bytes to add to align code as requested.
+// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
+// This only computes the length and does not update the (missing parameter)
+// current function's own required alignment.
+//
+// pc: current offset in function, in bytes
+// p: a PCALIGN or PCALIGNMAX prog
+// ctxt: the context, for current function
+// returns number of bytes of padding needed,
+func AlignmentPaddingLength(pc int32, p *Prog, ctxt *Link) int {
+ a := p.From.Offset
+ if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
+ ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
+ return 0
+ }
+ pc64 := int64(pc)
+ lob := pc64 & (a - 1) // Low Order Bits -- if not zero, then not aligned
+ if p.As == APCALIGN {
+ if lob != 0 {
+ return int(a - lob)
+ }
+ return 0
+ }
+ // emit as many as s bytes of padding to obtain alignment
+ s := p.To.Offset
+ if s < 0 || s >= a {
+ ctxt.Diag("PCALIGNMAX 'amount' %d must be non-negative and smaller than the aligment %d\n", s, a)
+ return 0
+ }
+ if s >= a-lob {
+ return int(a - lob)
+ }
+ return 0
+}
+
+// requireAlignment ensures that the function is aligned enough to support
+// the required code alignment
+func requireAlignment(a int64, ctxt *Link, cursym *LSym) {
+ // TODO remove explicit knowledge about AIX.
+ if ctxt.Headtype != objabi.Haix && cursym.Func().Align < int32(a) {
+ cursym.Func().Align = int32(a)
+ }
+}
n int32 // Size of the pad
}
-// Padding bytes to add to align code as requested.
-// Alignment is restricted to powers of 2 between 8 and 2048 inclusive.
+// requireAlignment ensures that the function alignment is at
+// least as high as a, which should be a power of two
+// and between 8 and 2048, inclusive.
//
-// pc: current offset in function, in bytes
-// a: requested alignment, in bytes
-// cursym: current function being assembled
-// returns number of bytes of padding needed
-func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
+// the boolean result indicates whether the alignment meets those constraints
+func requireAlignment(a int64, ctxt *obj.Link, cursym *obj.LSym) bool {
if !((a&(a-1) == 0) && 8 <= a && a <= 2048) {
ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", a)
- return 0
+ return false
}
-
// By default function alignment is 32 bytes for amd64
if cursym.Func().Align < int32(a) {
cursym.Func().Align = int32(a)
}
-
- if pc&(a-1) != 0 {
- return int(a - (pc & (a - 1)))
- }
-
- return 0
+ return true
}
func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
c0 := c
c = pjc.padJump(ctxt, s, p, c)
- if p.As == obj.APCALIGN {
- aln := p.From.Offset
- v := addpad(int64(c), aln, ctxt, s)
+ if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
+ v := obj.AlignmentPadding(c, p, ctxt, s)
if v > 0 {
s.Grow(int64(c) + int64(v))
fillnop(s.P[c:], int(v))
}
-
+ p.Pc = int64(c)
c += int32(v)
pPrev = p
continue
+
}
if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {