]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.ssa] cmd/compile: add a zero arg cse pass
authorTodd Neal <todd@tneal.org>
Thu, 11 Feb 2016 01:39:32 +0000 (19:39 -0600)
committerTodd Neal <todd@tneal.org>
Mon, 22 Feb 2016 13:06:18 +0000 (13:06 +0000)
Add an initial cse pass that only operates on zero argument
values.  This removes the need for a special case in cse for removing
OpSB and speeds up arithConst_ssa.go compilation by 9% while slowing
"test -c net/http" by 1.5%.

Change-Id: Id1500482485426f66c6c2eba75eeaf4f19c8a889
Reviewed-on: https://go-review.googlesource.com/19454
Run-TryBot: Todd Neal <todd@tneal.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/cse.go
src/cmd/compile/internal/ssa/cse_test.go
src/cmd/compile/internal/ssa/zcse.go [new file with mode: 0644]

index 69f751187d4ee5ed906e26585417fbf035346599..dfead98c65825aa91d28a281ff21296a8e8635d6 100644 (file)
@@ -102,8 +102,9 @@ var passes = [...]pass{
        {"decompose user", decomposeUser, true},
        {"decompose builtin", decomposeBuiltIn, true},
        {"opt", opt, true},                // TODO: split required rules and optimizing rules
+       {"zero arg cse", zcse, true},      // required to merge OpSB values
        {"opt deadcode", deadcode, false}, // remove any blocks orphaned during opt
-       {"generic cse", cse, true},
+       {"generic cse", cse, false},
        {"nilcheckelim", nilcheckelim, false},
        {"generic deadcode", deadcode, false},
        {"fuse", fuse, false},
index 36ab6a3680fbc6e7f7bca049fd562d2b76ef1dc2..545e173928a4fc0626011d3b34e23ce40004629f 100644 (file)
@@ -13,34 +13,6 @@ import (
 // Values are just relinked, nothing is deleted.  A subsequent deadcode
 // pass is required to actually remove duplicate expressions.
 func cse(f *Func) {
-       if !f.Config.optimize {
-               // Don't do CSE in this case.  But we need to do
-               // just a little bit, to combine multiple OpSB ops.
-               // Regalloc gets very confused otherwise.
-               var sb *Value
-       outer:
-               for _, b := range f.Blocks {
-                       for _, v := range b.Values {
-                               if v.Op == OpSB {
-                                       sb = v
-                                       break outer
-                               }
-                       }
-               }
-               if sb == nil {
-                       return
-               }
-               for _, b := range f.Blocks {
-                       for _, v := range b.Values {
-                               for i, a := range v.Args {
-                                       if a.Op == OpSB {
-                                               v.Args[i] = sb
-                                       }
-                               }
-                       }
-               }
-               return
-       }
        // Two values are equivalent if they satisfy the following definition:
        // equivalent(v, w):
        //   v.op == w.op
@@ -77,6 +49,14 @@ func cse(f *Func) {
                }
        }
        for i, e := range partition {
+               if Debug > 1 && len(e) > 500 {
+                       fmt.Printf("CSE.large partition (%d): ", len(e))
+                       for j := 0; j < 3; j++ {
+                               fmt.Printf("%s ", e[j].LongString())
+                       }
+                       fmt.Println()
+               }
+
                for _, v := range e {
                        valueEqClass[v.ID] = ID(i)
                }
index fb9fada120a8357b6c487c99e484529759210771..905939fc32199585e068c8e9e9430f2edcdb034a 100644 (file)
@@ -6,12 +6,16 @@ package ssa
 
 import "testing"
 
+type tstAux struct {
+       s string
+}
+
 // This tests for a bug found when partitioning, but not sorting by the Aux value.
 func TestCSEAuxPartitionBug(t *testing.T) {
        c := testConfig(t)
-       arg1Aux := "arg1-aux"
-       arg2Aux := "arg2-aux"
-       arg3Aux := "arg3-aux"
+       arg1Aux := &tstAux{"arg1-aux"}
+       arg2Aux := &tstAux{"arg2-aux"}
+       arg3Aux := &tstAux{"arg3-aux"}
 
        // construct lots of values with args that have aux values and place
        // them in an order that triggers the bug
@@ -77,5 +81,43 @@ func TestCSEAuxPartitionBug(t *testing.T) {
        if s1Cnt != 0 || s2Cnt != 0 {
                t.Errorf("%d values missed during cse", s1Cnt+s2Cnt)
        }
+}
 
+// TestZCSE tests the zero arg cse.
+func TestZCSE(t *testing.T) {
+       c := testConfig(t)
+
+       fun := Fun(c, "entry",
+               Bloc("entry",
+                       Valu("start", OpInitMem, TypeMem, 0, nil),
+                       Valu("sp", OpSP, TypeBytePtr, 0, nil),
+                       Valu("sb1", OpSB, TypeBytePtr, 0, nil),
+                       Valu("sb2", OpSB, TypeBytePtr, 0, nil),
+                       Valu("addr1", OpAddr, TypeInt64Ptr, 0, nil, "sb1"),
+                       Valu("addr2", OpAddr, TypeInt64Ptr, 0, nil, "sb2"),
+                       Valu("a1ld", OpLoad, TypeInt64, 0, nil, "addr1", "start"),
+                       Valu("a2ld", OpLoad, TypeInt64, 0, nil, "addr2", "start"),
+                       Valu("c1", OpConst64, TypeInt64, 1, nil),
+                       Valu("r1", OpAdd64, TypeInt64, 0, nil, "a1ld", "c1"),
+                       Valu("c2", OpConst64, TypeInt64, 1, nil),
+                       Valu("r2", OpAdd64, TypeInt64, 0, nil, "a2ld", "c2"),
+                       Valu("r3", OpAdd64, TypeInt64, 0, nil, "r1", "r2"),
+                       Valu("raddr", OpAddr, TypeInt64Ptr, 0, nil, "sp"),
+                       Valu("raddrdef", OpVarDef, TypeMem, 0, nil, "start"),
+                       Valu("rstore", OpStore, TypeMem, 8, nil, "raddr", "r3", "raddrdef"),
+                       Goto("exit")),
+               Bloc("exit",
+                       Exit("rstore")))
+
+       CheckFunc(fun.f)
+       zcse(fun.f)
+       deadcode(fun.f)
+       CheckFunc(fun.f)
+
+       if fun.values["c1"].Op != OpInvalid && fun.values["c2"].Op != OpInvalid {
+               t.Errorf("zsce should have removed c1 or c2")
+       }
+       if fun.values["sb1"].Op != OpInvalid && fun.values["sb2"].Op != OpInvalid {
+               t.Errorf("zsce should have removed sb1 or sb2")
+       }
 }
diff --git a/src/cmd/compile/internal/ssa/zcse.go b/src/cmd/compile/internal/ssa/zcse.go
new file mode 100644 (file)
index 0000000..3206e19
--- /dev/null
@@ -0,0 +1,95 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// zcse does an initial pass of common-subexpression elimination on the
+// function for values with zero arguments to allow the more expensive cse
+// to begin with a reduced number of values. Values are just relinked,
+// nothing is deleted. A subsequent deadcode pass is required to actually
+// remove duplicate expressions.
+func zcse(f *Func) {
+       vals := make(map[vkey]*Value)
+
+       for _, b := range f.Blocks {
+               for i := 0; i < len(b.Values); {
+                       v := b.Values[i]
+                       next := true
+                       switch v.Op {
+                       case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F,
+                               OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface:
+                               key := vkey{v.Op, keyFor(v), typeStr(v)}
+                               if vals[key] == nil {
+                                       vals[key] = v
+                                       if b != f.Entry {
+                                               // Move v to the entry block so it will dominate every block
+                                               // where we might use it. This prevents the need for any dominator
+                                               // calculations in this pass.
+                                               v.Block = f.Entry
+                                               f.Entry.Values = append(f.Entry.Values, v)
+                                               last := len(b.Values) - 1
+                                               b.Values[i] = b.Values[last]
+                                               b.Values[last] = nil
+                                               b.Values = b.Values[:last]
+
+                                               // process b.Values[i] again
+                                               next = false
+                                       }
+                               }
+                       }
+                       if next {
+                               i++
+                       }
+               }
+       }
+
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       for i, a := range v.Args {
+                               // TODO: encode arglen in the opcode table, then do this switch with a table lookup?
+                               switch a.Op {
+                               case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F,
+                                       OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface:
+                                       key := vkey{a.Op, keyFor(a), typeStr(a)}
+                                       if rv, ok := vals[key]; ok {
+                                               v.Args[i] = rv
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+// vkey is a type used to uniquely identify a zero arg value.
+type vkey struct {
+       op Op
+       a  int64  // aux
+       t  string // type
+}
+
+// typeStr returns a string version of the type of v.
+func typeStr(v *Value) string {
+       if v.Type == nil {
+               return ""
+       }
+       return v.Type.String()
+}
+
+// keyFor returns the AuxInt portion of a  key structure uniquely identifying a
+// zero arg value for the supported ops.
+func keyFor(v *Value) int64 {
+       switch v.Op {
+       case OpConst64, OpConst64F, OpConst32F:
+               return v.AuxInt
+       case OpConst32:
+               return int64(int32(v.AuxInt))
+       case OpConst16:
+               return int64(int16(v.AuxInt))
+       case OpConst8, OpConstBool:
+               return int64(int8(v.AuxInt))
+       default:
+               // Also matches OpSB, OpConstNil, OpConstSlice, OpConstInterface:
+               return 0
+       }
+}