From 623ef2813579c9b52ba4a0335722df4d93566b74 Mon Sep 17 00:00:00 2001 From: Michael Matloob Date: Mon, 24 Nov 2025 18:33:30 -0500 Subject: [PATCH] cmd/go: limit total compile -c backend concurrency using a pool Previously we limited the value we passed in to compile -c (which set the number of SSA compile goroutines that run at one time) to 4. This CL allows the -c value to go up to GOMAXPROCS, while limiting the total number of backend SSA compile goroutines to still be less than the previous worst case of 4*GOMAXPROCS (actually four times the value of the -p flag, but the default is GOMAXPROCS). We do that by keeping a pool of tokens to represent the total number of SSA compile goroutines (with some buffer to allow us to run out of tokens and not exceed 4*GOMAXPROCS). Each time a compile requests a -c value, we'll hand out half of the remaining tokens (with the number handed otu capped at GOMAXPROCS) until we run out of tokens, in wich case we'll set -c to one. This leads to a speed up of 3-10% on the 16 core intel perf builder and 5-16% on the 88 core builder on the Sweet go-build benchmark. Change-Id: Ib1ec843fee57f0fb8d36a507162317276a6a6964 Reviewed-on: https://go-review.googlesource.com/c/go/+/724142 Reviewed-by: Alan Donovan LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Matloob --- src/cmd/go/internal/work/gc.go | 79 +++++++++++++++++++++++--------- src/cmd/go/internal/work/init.go | 1 + 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/src/cmd/go/internal/work/gc.go b/src/cmd/go/internal/work/gc.go index 71c36e80cb..bbb087b436 100644 --- a/src/cmd/go/internal/work/gc.go +++ b/src/cmd/go/internal/work/gc.go @@ -15,6 +15,7 @@ import ( "path/filepath" "runtime" "strings" + "sync" "cmd/go/internal/base" "cmd/go/internal/cfg" @@ -127,7 +128,9 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg gcflags = append(gcflags, fuzzInstrumentFlags()...) } // Add -c=N to use concurrent backend compilation, if possible. - if c := gcBackendConcurrency(gcflags); c > 1 { + c, release := compilerConcurrency() + defer release() + if c > 1 { defaultGcFlags = append(defaultGcFlags, fmt.Sprintf("-c=%d", c)) } @@ -177,8 +180,9 @@ func (gcToolchain) gc(b *Builder, a *Action, archive string, importcfg, embedcfg return ofile, output, err } -// gcBackendConcurrency returns the backend compiler concurrency level for a package compilation. -func gcBackendConcurrency(gcflags []string) int { +// compilerConcurrency returns the compiler concurrency level for a package compilation. +// The returned function must be called after the compile finishes. +func compilerConcurrency() (int, func()) { // First, check whether we can use -c at all for this compilation. canDashC := concurrentGCBackendCompilationEnabledByDefault @@ -199,7 +203,7 @@ func gcBackendConcurrency(gcflags []string) int { } if !canDashC { - return 1 + return 1, func() {} } // Decide how many concurrent backend compilations to allow. @@ -212,29 +216,60 @@ func gcBackendConcurrency(gcflags []string) int { // of the overall compiler execution, so c==1 for much of the build. // So don't worry too much about that interaction for now. // - // However, in practice, setting c above 4 tends not to help very much. - // See the analysis in CL 41192. + // But to keep things reasonable, we maintain a cap on the total number of + // concurrent backend compiles. (If we gave each compile action the full GOMAXPROCS, we could + // potentially have GOMAXPROCS^2 running compile goroutines) In the past, we'd limit + // the number of concurrent backend compiles per process to 4, which would result in a worst-case number + // of backend compiles of 4*cfg.BuildP. Because some compile processes benefit from having + // a larger number of compiles, especially when the compile action is the only + // action running, we'll allow the max value to be larger, but ensure that the + // total number of backend compiles never exceeds that previous worst-case number. + // This is implemented using a pool of tokens that are given out. We'll set aside enough + // tokens to make sure we don't run out, and then give half of the remaining tokens (up to + // GOMAXPROCS) to each compile action that requests it. // - // TODO(josharian): attempt to detect whether this particular compilation - // is likely to be a bottleneck, e.g. when: - // - it has no successor packages to compile (usually package main) - // - all paths through the build graph pass through it - // - critical path scheduling says it is high priority - // and in such a case, set c to runtime.GOMAXPROCS(0). - // By default this is the same as runtime.NumCPU. - // We do this now when p==1. - // To limit parallelism, set GOMAXPROCS below numCPU; this may be useful + // As a user, to limit parallelism, set GOMAXPROCS below numCPU; this may be useful // on a low-memory builder, or if a deterministic build order is required. - c := runtime.GOMAXPROCS(0) if cfg.BuildP == 1 { // No process parallelism, do not cap compiler parallelism. - return c + return maxCompilerConcurrency, func() {} + } + + // Cap compiler parallelism using the pool. + tokensMu.Lock() + defer tokensMu.Unlock() + concurrentProcesses++ + // Set aside tokens so that we don't run out if we were running cfg.BuildP concurrent compiles. + // We'll set aside one token for each of the action goroutines that aren't currently running a compile. + setAside := cfg.BuildP - concurrentProcesses + availableTokens := tokens - setAside + // Grab half the remaining tokens: but with a floor of at least 1 token, and + // a ceiling of the max backend concurrency. + c := max(min(availableTokens/2, maxCompilerConcurrency), 1) + tokens -= c + // Successfully grabbed the tokens. + return c, func() { + tokensMu.Lock() + defer tokensMu.Unlock() + tokens += c } - // Some process parallelism. Set c to min(4, maxprocs). - if c > 4 { - c = 4 - } - return c +} + +var maxCompilerConcurrency = runtime.GOMAXPROCS(0) // max value we will use for -c + +var ( + tokensMu sync.Mutex + tokens int // number of available tokens + concurrentProcesses int // number of currently running compiles +) + +// initCompilerConcurrencyPool sets the number of tokens in the pool. It needs +// to be run after init, so that it can use the value of cfg.BuildP. +func initCompilerConcurrencyPool() { + // Size the pool so that the worst case total number of compiles is not more + // than what it was when we capped the concurrency to 4. + oldConcurrencyCap := min(4, maxCompilerConcurrency) + tokens = oldConcurrencyCap * cfg.BuildP } // trimpath returns the -trimpath argument to use diff --git a/src/cmd/go/internal/work/init.go b/src/cmd/go/internal/work/init.go index a2954ab91a..964d6e8363 100644 --- a/src/cmd/go/internal/work/init.go +++ b/src/cmd/go/internal/work/init.go @@ -60,6 +60,7 @@ func BuildInit(loaderstate *modload.State) { modload.Init(loaderstate) instrumentInit() buildModeInit() + initCompilerConcurrencyPool() cfgChangedEnv = makeCfgChangedEnv() if err := fsys.Init(); err != nil { -- 2.52.0