From ca5417b8e0c859aa5537247aed03316bfd3f5a66 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 21 Mar 2016 07:20:01 +1100 Subject: [PATCH] cmd/compile: reduce some SSA garbage MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit It's pretty hard to get reliable CPU numbers, even with 50 runs on an otherwise-idle physical Linux machine, but the garbage reduction numbers are nice. To get useful time/op numbers, I modified compilebench to report user CPU time instead of wall time: name old time/op new time/op delta Template 547ms ± 6% 557ms ± 5% +1.80% (p=0.001 n=49+49) Unicode 360ms ± 9% 365ms ± 6% ~ (p=0.094 n=50+45) GoTypes 1.84s ± 3% 1.82s ± 3% -1.50% (p=0.000 n=50+49) Compiler 9.19s ± 2% 9.02s ± 2% -1.87% (p=0.000 n=45+50) name old alloc/op new alloc/op delta Template 63.3MB ± 0% 59.1MB ± 0% -6.72% (p=0.000 n=50+50) Unicode 43.1MB ± 0% 42.9MB ± 0% -0.47% (p=0.000 n=50+49) GoTypes 220MB ± 0% 200MB ± 0% -9.00% (p=0.000 n=50+50) Compiler 1.00GB ± 0% 0.89GB ± 0% -10.09% (p=0.000 n=50+49) name old allocs/op new allocs/op delta Template 681k ± 0% 680k ± 0% -0.16% (p=0.000 n=50+48) Unicode 541k ± 0% 541k ± 0% -0.02% (p=0.011 n=48+50) GoTypes 2.08M ± 0% 2.08M ± 0% -0.19% (p=0.000 n=48+50) Compiler 9.24M ± 0% 9.23M ± 0% -0.11% (p=0.000 n=50+50) Change-Id: I1fac4ebf85a1783e3289c3ffb1ed365442837643 Reviewed-on: https://go-review.googlesource.com/20995 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Dave Cheney Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/config.go | 4 ++ src/cmd/compile/internal/ssa/stackalloc.go | 84 +++++++++++++++++++--- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index d0de429f35..f090992b0d 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -36,6 +36,10 @@ type Config struct { values [2000]Value blocks [200]Block + // Reusable stackAllocState. + // See stackalloc.go's {new,put}StackAllocState. + stackAllocState *stackAllocState + domblockstore []ID // scratch space for computing dominators scrSparse []*sparseSet // scratch sparse sets to be re-used. } diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index b4d964c87f..253c83f163 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -9,10 +9,51 @@ package ssa import "fmt" type stackAllocState struct { - f *Func + f *Func + + // live is the output of stackalloc. + // live[b.id] = live values at the end of block b. + live [][]ID + + // The following slices are reused across multiple users + // of stackAllocState. values []stackValState - live [][]ID // live[b.id] = live values at the end of block b. interfere [][]ID // interfere[v.id] = values that interfere with v. + names []LocalSlot + slots []int + used []bool +} + +func newStackAllocState(f *Func) *stackAllocState { + s := f.Config.stackAllocState + if s == nil { + return new(stackAllocState) + } + if s.f != nil { + f.Config.Fatalf(0, "newStackAllocState called without previous free") + } + return s +} + +func putStackAllocState(s *stackAllocState) { + for i := range s.values { + s.values[i] = stackValState{} + } + for i := range s.interfere { + s.interfere[i] = nil + } + for i := range s.names { + s.names[i] = LocalSlot{} + } + for i := range s.slots { + s.slots[i] = 0 + } + for i := range s.used { + s.used[i] = false + } + s.f.Config.stackAllocState = s + s.f = nil + s.live = nil } type stackValState struct { @@ -29,8 +70,10 @@ func stackalloc(f *Func, spillLive [][]ID) [][]ID { fmt.Println("before stackalloc") fmt.Println(f.String()) } - var s stackAllocState + s := newStackAllocState(f) s.init(f, spillLive) + defer putStackAllocState(s) + s.stackalloc() return s.live } @@ -39,7 +82,11 @@ func (s *stackAllocState) init(f *Func, spillLive [][]ID) { s.f = f // Initialize value information. - s.values = make([]stackValState, f.NumValues()) + if n := f.NumValues(); cap(s.values) >= n { + s.values = s.values[:n] + } else { + s.values = make([]stackValState, n) + } for _, b := range f.Blocks { for _, v := range b.Values { s.values[v.ID].typ = v.Type @@ -66,7 +113,12 @@ func (s *stackAllocState) stackalloc() { // Build map from values to their names, if any. // A value may be associated with more than one name (e.g. after // the assignment i=j). This step picks one name per value arbitrarily. - names := make([]LocalSlot, f.NumValues()) + if n := f.NumValues(); cap(s.names) >= n { + s.names = s.names[:n] + } else { + s.names = make([]LocalSlot, n) + } + names := s.names for _, name := range f.Names { // Note: not "range f.NamedValues" above, because // that would be nondeterministic. @@ -96,13 +148,25 @@ func (s *stackAllocState) stackalloc() { // Each time we assign a stack slot to a value v, we remember // the slot we used via an index into locations[v.Type]. - slots := make([]int, f.NumValues()) + slots := s.slots + if n := f.NumValues(); cap(slots) >= n { + slots = slots[:n] + } else { + slots = make([]int, n) + s.slots = slots + } for i := f.NumValues() - 1; i >= 0; i-- { slots[i] = -1 } // Pick a stack slot for each value needing one. - used := make([]bool, f.NumValues()) + var used []bool + if n := f.NumValues(); cap(s.used) >= n { + used = s.used[:n] + } else { + used = make([]bool, n) + s.used = used + } for _, b := range f.Blocks { for _, v := range b.Values { if !s.values[v.ID].needSlot { @@ -270,7 +334,11 @@ func (f *Func) setHome(v *Value, loc Location) { func (s *stackAllocState) buildInterferenceGraph() { f := s.f - s.interfere = make([][]ID, f.NumValues()) + if n := f.NumValues(); cap(s.interfere) >= n { + s.interfere = s.interfere[:n] + } else { + s.interfere = make([][]ID, n) + } live := f.newSparseSet(f.NumValues()) defer f.retSparseSet(live) for _, b := range f.Blocks { -- 2.48.1