From 7d16e44d4f36fb37f43dc5318fbe13a1ba50425d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Mart=C3=AD?= Date: Tue, 17 Sep 2019 14:39:54 +0100 Subject: [PATCH] cmd/compile: reduce the regexp work in rulegen MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit As correctly pointed out by Giovanni Bajo, doing a single regexp pass should be much faster than doing hundreds per architecture. We can then use a map to keep track of what ops are handled in each file. And the amount of saved work is evident: name old time/op new time/op delta Rulegen 2.48s ± 1% 2.02s ± 1% -18.44% (p=0.008 n=5+5) name old user-time/op new user-time/op delta Rulegen 10.9s ± 1% 8.9s ± 0% -18.27% (p=0.008 n=5+5) name old sys-time/op new sys-time/op delta Rulegen 209ms ±28% 236ms ±18% ~ (p=0.310 n=5+5) name old peak-RSS-bytes new peak-RSS-bytes delta Rulegen 178MB ± 3% 176MB ± 3% ~ (p=0.548 n=5+5) The speed-up is so large that we don't need to parallelize it anymore; the numbers above are with the removed goroutines. Adding them back in doesn't improve performance noticeably at all: name old time/op new time/op delta Rulegen 2.02s ± 1% 2.01s ± 1% ~ (p=0.421 n=5+5) name old user-time/op new user-time/op delta Rulegen 8.90s ± 0% 8.96s ± 1% ~ (p=0.095 n=5+5) While at it, remove an unused method. Change-Id: I328b56e63b64a9ab48147e67e7d5a385c795ec54 Reviewed-on: https://go-review.googlesource.com/c/go/+/195739 Run-TryBot: Daniel Martí TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/gen/main.go | 41 +++++++++++---------- src/cmd/compile/internal/ssa/gen/rulegen.go | 1 - 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/main.go b/src/cmd/compile/internal/ssa/gen/main.go index 783ac1bd30..db40057743 100644 --- a/src/cmd/compile/internal/ssa/gen/main.go +++ b/src/cmd/compile/internal/ssa/gen/main.go @@ -392,34 +392,35 @@ func genOp() { // Check that the arch genfile handles all the arch-specific opcodes. // This is very much a hack, but it is better than nothing. - var wg sync.WaitGroup + // + // Do a single regexp pass to record all ops being handled in a map, and + // then compare that with the ops list. This is much faster than one + // regexp pass per opcode. for _, a := range archs { if a.genfile == "" { continue } - a := a - wg.Add(1) - go func() { - src, err := ioutil.ReadFile(a.genfile) - if err != nil { - log.Fatalf("can't read %s: %v", a.genfile, err) - } + pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name) + rxOp, err := regexp.Compile(pattern) + if err != nil { + log.Fatalf("bad opcode regexp %s: %v", pattern, err) + } - for _, v := range a.ops { - pattern := fmt.Sprintf(`\Wssa\.Op%s%s\W`, a.name, v.name) - match, err := regexp.Match(pattern, src) - if err != nil { - log.Fatalf("bad opcode regexp %s: %v", pattern, err) - } - if !match { - log.Fatalf("Op%s%s has no code generation in %s", a.name, v.name, a.genfile) - } + src, err := ioutil.ReadFile(a.genfile) + if err != nil { + log.Fatalf("can't read %s: %v", a.genfile, err) + } + seen := make(map[string]bool, len(a.ops)) + for _, m := range rxOp.FindAllSubmatch(src, -1) { + seen[string(m[1])] = true + } + for _, op := range a.ops { + if !seen[op.name] { + log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile) } - wg.Done() - }() + } } - wg.Wait() } // Name returns the name of the architecture for use in Op* and Block* enumerations. diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go index ed3ed75638..215f051370 100644 --- a/src/cmd/compile/internal/ssa/gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/gen/rulegen.go @@ -636,7 +636,6 @@ type bodyBase struct { canFail bool } -func (w *bodyBase) body() []Statement { return w.list } func (w *bodyBase) add(nodes ...Statement) { w.list = append(w.list, nodes...) for _, node := range nodes { -- 2.50.0