From b63b0f2b75c71b9e80e42488aaa7ddf1a5da11ae Mon Sep 17 00:00:00 2001 From: Alberto Donizetti Date: Mon, 26 Mar 2018 12:38:29 +0200 Subject: [PATCH] cmd/compile: allocate less in regalloc's liveValues MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Instrumenting the compiler shows that, at the end of liveValues, the values of the workList's cap are distributed as: cap freq 1 0.006 2 0.002 4 0.237 8 0.272 16 0.254 32 0.141 64 0.062 128 0.02 256 0.005 512 0.001 1024 0.0 Since the initial workList slice allocation is always on the stack (as the variable does not escape), we can aggressively pre-allocate a big backing array at (almost) no cost. This will save several allocations in liveValues calls that end up having a large workList, with no performance penalties for calls that have a small workList. name old time/op new time/op delta Template 284ms ± 3% 282ms ± 3% ~ (p=0.201 n=20+20) Unicode 138ms ± 7% 138ms ± 7% ~ (p=0.718 n=20+20) GoTypes 905ms ± 2% 895ms ± 1% -1.10% (p=0.003 n=19+18) Compiler 4.26s ± 1% 4.25s ± 1% -0.38% (p=0.038 n=20+19) SSA 9.85s ± 2% 9.80s ± 1% ~ (p=0.061 n=20+19) Flate 187ms ± 6% 186ms ± 5% ~ (p=0.289 n=20+20) GoParser 227ms ± 3% 225ms ± 3% ~ (p=0.072 n=20+20) Reflect 578ms ± 2% 575ms ± 2% ~ (p=0.059 n=18+20) Tar 263ms ± 2% 265ms ± 3% ~ (p=0.224 n=19+20) XML 323ms ± 3% 325ms ± 2% ~ (p=0.127 n=20+20) name old user-time/op new user-time/op delta Template 406ms ± 6% 404ms ± 4% ~ (p=0.314 n=20+20) Unicode 220ms ± 6% 215ms ±11% ~ (p=0.077 n=18+20) GoTypes 1.25s ± 3% 1.24s ± 4% ~ (p=0.461 n=20+20) Compiler 5.95s ± 2% 5.84s ± 5% -1.93% (p=0.007 n=20+20) SSA 14.4s ± 4% 14.2s ± 4% ~ (p=0.108 n=20+20) Flate 257ms ± 6% 252ms ± 9% ~ (p=0.063 n=20+20) GoParser 317ms ± 5% 312ms ± 6% -1.85% (p=0.049 n=20+20) Reflect 779ms ± 2% 774ms ± 3% ~ (p=0.253 n=20+20) Tar 371ms ± 4% 374ms ± 4% ~ (p=0.327 n=20+20) XML 440ms ± 5% 442ms ± 5% ~ (p=0.678 n=20+20) name old alloc/op new alloc/op delta Template 39.4MB ± 0% 39.0MB ± 0% -0.96% (p=0.000 n=20+20) Unicode 29.1MB ± 0% 29.0MB ± 0% -0.13% (p=0.000 n=20+20) GoTypes 117MB ± 0% 116MB ± 0% -0.88% (p=0.000 n=20+20) Compiler 502MB ± 0% 498MB ± 0% -0.77% (p=0.000 n=19+20) SSA 1.42GB ± 0% 1.40GB ± 0% -0.80% (p=0.000 n=20+20) Flate 25.3MB ± 0% 25.0MB ± 0% -1.10% (p=0.000 n=20+19) GoParser 31.3MB ± 0% 31.0MB ± 0% -1.05% (p=0.000 n=20+20) Reflect 77.9MB ± 0% 77.1MB ± 0% -1.03% (p=0.000 n=20+20) Tar 40.0MB ± 0% 39.7MB ± 0% -0.80% (p=0.000 n=20+20) XML 45.2MB ± 0% 44.9MB ± 0% -0.72% (p=0.000 n=20+20) name old allocs/op new allocs/op delta Template 392k ± 0% 386k ± 0% -1.44% (p=0.000 n=20+20) Unicode 337k ± 0% 337k ± 0% -0.22% (p=0.000 n=20+20) GoTypes 1.22M ± 0% 1.20M ± 0% -1.33% (p=0.000 n=20+20) Compiler 4.76M ± 0% 4.71M ± 0% -1.12% (p=0.000 n=20+20) SSA 11.8M ± 0% 11.7M ± 0% -1.00% (p=0.000 n=20+20) Flate 241k ± 0% 238k ± 0% -1.49% (p=0.000 n=20+20) GoParser 324k ± 0% 320k ± 0% -1.17% (p=0.000 n=20+20) Reflect 981k ± 0% 961k ± 0% -2.11% (p=0.000 n=20+20) Tar 402k ± 0% 397k ± 0% -1.29% (p=0.000 n=20+20) XML 424k ± 0% 419k ± 0% -1.10% (p=0.000 n=19+20) Change-Id: If46667ae98eee2d47a615cad05e18df0629d8388 Reviewed-on: https://go-review.googlesource.com/102495 Run-TryBot: Alberto Donizetti TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/deadcode.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/compile/internal/ssa/deadcode.go b/src/cmd/compile/internal/ssa/deadcode.go index 4e22c965fe..47690f879e 100644 --- a/src/cmd/compile/internal/ssa/deadcode.go +++ b/src/cmd/compile/internal/ssa/deadcode.go @@ -54,7 +54,7 @@ func liveValues(f *Func, reachable []bool) []bool { } // Find all live values - var q []*Value // stack-like worklist of unscanned values + q := make([]*Value, 0, 64) // stack-like worklist of unscanned values // Starting set: all control values of reachable blocks are live. // Calls are live (because callee can observe the memory state). -- 2.48.1