From b96189d1a046f7a70a632bd02106bd15e096dfa1 Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Tue, 23 Feb 2016 17:52:17 -0600 Subject: [PATCH] [dev.ssa] cmd/compile: speed up cse MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Construct better initial partitions by recursively comparing values and their arguments. This saves one second on compile of arithConst_ssa.go (4.3s to 3.3s) and shows a 3-5% increase with compilebench. name old time/op new time/op delta Template 266ms ± 3% 253ms ± 4% -5.08% (p=0.032 n=5+5) GoTypes 927ms ± 3% 885ms ± 2% -4.55% (p=0.016 n=5+5) Compiler 3.91s ± 3% 3.73s ± 2% -4.49% (p=0.008 n=5+5) MakeBash 31.6s ± 1% 30.5s ± 3% -3.51% (p=0.016 n=5+5) Change-Id: I6ede31ff459131ccfed69531acfbd06b19837700 Reviewed-on: https://go-review.googlesource.com/19838 Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/cse.go | 161 ++++++++++----------------- src/cmd/compile/internal/ssa/type.go | 3 + 2 files changed, 62 insertions(+), 102 deletions(-) diff --git a/src/cmd/compile/internal/ssa/cse.go b/src/cmd/compile/internal/ssa/cse.go index 44bd87683d..f7958542aa 100644 --- a/src/cmd/compile/internal/ssa/cse.go +++ b/src/cmd/compile/internal/ssa/cse.go @@ -9,6 +9,10 @@ import ( "sort" ) +const ( + cmpDepth = 4 +) + // cse does common-subexpression elimination on the Function. // Values are just relinked, nothing is deleted. A subsequent deadcode // pass is required to actually remove duplicate expressions. @@ -30,8 +34,12 @@ func cse(f *Func) { // Make initial coarse partitions by using a subset of the conditions above. a := make([]*Value, 0, f.NumValues()) + auxIDs := auxmap{} for _, b := range f.Blocks { for _, v := range b.Values { + if auxIDs[v.Aux] == 0 { + auxIDs[v.Aux] = int32(len(auxIDs)) + 1 + } if v.Type.IsMemory() { continue // memory values can never cse } @@ -42,7 +50,7 @@ func cse(f *Func) { a = append(a, v) } } - partition := partitionValues(a) + partition := partitionValues(a, auxIDs) // map from value id back to eqclass id valueEqClass := make([]ID, f.NumValues()) @@ -202,8 +210,7 @@ type eqclass []*Value // being a sorted by ID list of *Values. The eqclass slices are // backed by the same storage as the input slice. // Equivalence classes of size 1 are ignored. -func partitionValues(a []*Value) []eqclass { - auxIDs := map[interface{}]int32{} +func partitionValues(a []*Value, auxIDs auxmap) []eqclass { sort.Sort(sortvalues{a, auxIDs}) var partition []eqclass @@ -212,30 +219,7 @@ func partitionValues(a []*Value) []eqclass { j := 1 for ; j < len(a); j++ { w := a[j] - rootsDiffer := v.Op != w.Op || - v.AuxInt != w.AuxInt || - len(v.Args) != len(w.Args) || - v.Op == OpPhi && v.Block != w.Block || - v.Aux != w.Aux - if rootsDiffer || - len(v.Args) >= 1 && (v.Args[0].Op != w.Args[0].Op || - v.Args[0].AuxInt != w.Args[0].AuxInt) || - len(v.Args) >= 2 && (v.Args[1].Op != w.Args[1].Op || - v.Args[1].AuxInt != w.Args[1].AuxInt) || - v.Type.Compare(w.Type) != CMPeq { - if Debug > 3 { - fmt.Printf("CSE.partitionValues separates %s from %s, AuxInt=%v, Aux=%v, Type.compare=%v", - v.LongString(), w.LongString(), v.AuxInt != w.AuxInt, v.Aux != w.Aux, v.Type.Compare(w.Type)) - if !rootsDiffer { - if len(v.Args) >= 1 { - fmt.Printf(", a0Op=%v, a0AuxInt=%v", v.Args[0].Op != w.Args[0].Op, v.Args[0].AuxInt != w.Args[0].AuxInt) - if len(v.Args) >= 2 { - fmt.Printf(", a1Op=%v, a1AuxInt=%v", v.Args[1].Op != w.Args[1].Op, v.Args[1].AuxInt != w.Args[1].AuxInt) - } - } - } - fmt.Printf("\n") - } + if cmpVal(v, w, auxIDs, cmpDepth) != CMPeq { break } } @@ -247,100 +231,73 @@ func partitionValues(a []*Value) []eqclass { return partition } - -// Sort values to make the initial partition. -type sortvalues struct { - a []*Value // array of values - auxIDs map[interface{}]int32 // aux -> aux ID map +func lt2Cmp(isLt bool) Cmp { + if isLt { + return CMPlt + } + return CMPgt } -func (sv sortvalues) Len() int { return len(sv.a) } -func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } -func (sv sortvalues) Less(i, j int) bool { - v := sv.a[i] - w := sv.a[j] +type auxmap map[interface{}]int32 + +func cmpVal(v, w *Value, auxIDs auxmap, depth int) Cmp { + // Try to order these comparison by cost (cheaper first) if v.Op != w.Op { - return v.Op < w.Op + return lt2Cmp(v.Op < w.Op) } if v.AuxInt != w.AuxInt { - return v.AuxInt < w.AuxInt - } - if v.Aux == nil && w.Aux != nil { // cheap aux check - expensive one below. - return true - } - if v.Aux != nil && w.Aux == nil { - return false + return lt2Cmp(v.AuxInt < w.AuxInt) } if len(v.Args) != len(w.Args) { - return len(v.Args) < len(w.Args) + return lt2Cmp(len(v.Args) < len(w.Args)) } - if v.Op == OpPhi && v.Block.ID != w.Block.ID { - return v.Block.ID < w.Block.ID + if v.Op == OpPhi && v.Block != w.Block { + return lt2Cmp(v.Block.ID < w.Block.ID) } - if len(v.Args) >= 1 { - vOp := v.Args[0].Op - wOp := w.Args[0].Op - if vOp != wOp { - return vOp < wOp - } - vAuxInt := v.Args[0].AuxInt - wAuxInt := w.Args[0].AuxInt - if vAuxInt != wAuxInt { - return vAuxInt < wAuxInt + if tc := v.Type.Compare(w.Type); tc != CMPeq { + return tc + } + + if v.Aux != w.Aux { + if v.Aux == nil { + return CMPlt } + if w.Aux == nil { + return CMPgt + } + return lt2Cmp(auxIDs[v.Aux] < auxIDs[w.Aux]) + } - if len(v.Args) >= 2 { - vOp = v.Args[1].Op - wOp = w.Args[1].Op - if vOp != wOp { - return vOp < wOp + if depth > 0 { + for i := range v.Args { + if v.Args[i] == w.Args[i] { + // skip comparing equal args + continue } - - vAuxInt = v.Args[1].AuxInt - wAuxInt = w.Args[1].AuxInt - if vAuxInt != wAuxInt { - return vAuxInt < wAuxInt + if ac := cmpVal(v.Args[i], w.Args[i], auxIDs, depth-1); ac != CMPeq { + return ac } } } - // Sort by type, using the ssa.Type Compare method - if v.Type != w.Type { - c := v.Type.Compare(w.Type) - if c != CMPeq { - return c == CMPlt - } - } + return CMPeq +} - // Aux fields are interfaces with no comparison - // method. Use a map to number distinct ones, - // and use those numbers for comparison. - if v.Aux != w.Aux { - x := sv.auxIDs[v.Aux] - if x == 0 { - x = int32(len(sv.auxIDs)) + 1 - sv.auxIDs[v.Aux] = x - } - y := sv.auxIDs[w.Aux] - if y == 0 { - y = int32(len(sv.auxIDs)) + 1 - sv.auxIDs[w.Aux] = y - } - if x != y { - return x < y - } - } +// Sort values to make the initial partition. +type sortvalues struct { + a []*Value // array of values + auxIDs auxmap // aux -> aux ID map +} - // TODO(khr): is the above really ok to do? We're building - // the aux->auxID map online as sort is asking about it. If - // sort has some internal randomness, then the numbering might - // change from run to run. That will make the ordering of - // partitions random. It won't break the compiler but may - // make it nondeterministic. We could fix this by computing - // the aux->auxID map ahead of time, but the hope is here that - // we won't need to compute the mapping for many aux fields - // because the values they are in are otherwise unique. +func (sv sortvalues) Len() int { return len(sv.a) } +func (sv sortvalues) Swap(i, j int) { sv.a[i], sv.a[j] = sv.a[j], sv.a[i] } +func (sv sortvalues) Less(i, j int) bool { + v := sv.a[i] + w := sv.a[j] + if cmp := cmpVal(v, w, sv.auxIDs, cmpDepth); cmp != CMPeq { + return cmp == CMPlt + } // Sort by value ID last to keep the sort result deterministic. return v.ID < w.ID diff --git a/src/cmd/compile/internal/ssa/type.go b/src/cmd/compile/internal/ssa/type.go index afe04fa043..a23989c82e 100644 --- a/src/cmd/compile/internal/ssa/type.go +++ b/src/cmd/compile/internal/ssa/type.go @@ -95,6 +95,9 @@ func (t *CompilerType) Compare(u Type) Cmp { if !ok { return CMPlt } + if t == x { + return CMPeq + } // desire fast sorting, not pretty sorting. if len(t.Name) == len(x.Name) { if t.Name == x.Name { -- 2.48.1