{"decompose user", decomposeUser, true},
{"decompose builtin", decomposeBuiltIn, true},
{"opt", opt, true}, // TODO: split required rules and optimizing rules
+ {"zero arg cse", zcse, true}, // required to merge OpSB values
{"opt deadcode", deadcode, false}, // remove any blocks orphaned during opt
- {"generic cse", cse, true},
+ {"generic cse", cse, false},
{"nilcheckelim", nilcheckelim, false},
{"generic deadcode", deadcode, false},
{"fuse", fuse, false},
// Values are just relinked, nothing is deleted. A subsequent deadcode
// pass is required to actually remove duplicate expressions.
func cse(f *Func) {
- if !f.Config.optimize {
- // Don't do CSE in this case. But we need to do
- // just a little bit, to combine multiple OpSB ops.
- // Regalloc gets very confused otherwise.
- var sb *Value
- outer:
- for _, b := range f.Blocks {
- for _, v := range b.Values {
- if v.Op == OpSB {
- sb = v
- break outer
- }
- }
- }
- if sb == nil {
- return
- }
- for _, b := range f.Blocks {
- for _, v := range b.Values {
- for i, a := range v.Args {
- if a.Op == OpSB {
- v.Args[i] = sb
- }
- }
- }
- }
- return
- }
// Two values are equivalent if they satisfy the following definition:
// equivalent(v, w):
// v.op == w.op
}
}
for i, e := range partition {
+ if Debug > 1 && len(e) > 500 {
+ fmt.Printf("CSE.large partition (%d): ", len(e))
+ for j := 0; j < 3; j++ {
+ fmt.Printf("%s ", e[j].LongString())
+ }
+ fmt.Println()
+ }
+
for _, v := range e {
valueEqClass[v.ID] = ID(i)
}
import "testing"
+type tstAux struct {
+ s string
+}
+
// This tests for a bug found when partitioning, but not sorting by the Aux value.
func TestCSEAuxPartitionBug(t *testing.T) {
c := testConfig(t)
- arg1Aux := "arg1-aux"
- arg2Aux := "arg2-aux"
- arg3Aux := "arg3-aux"
+ arg1Aux := &tstAux{"arg1-aux"}
+ arg2Aux := &tstAux{"arg2-aux"}
+ arg3Aux := &tstAux{"arg3-aux"}
// construct lots of values with args that have aux values and place
// them in an order that triggers the bug
if s1Cnt != 0 || s2Cnt != 0 {
t.Errorf("%d values missed during cse", s1Cnt+s2Cnt)
}
+}
+// TestZCSE tests the zero arg cse.
+func TestZCSE(t *testing.T) {
+ c := testConfig(t)
+
+ fun := Fun(c, "entry",
+ Bloc("entry",
+ Valu("start", OpInitMem, TypeMem, 0, nil),
+ Valu("sp", OpSP, TypeBytePtr, 0, nil),
+ Valu("sb1", OpSB, TypeBytePtr, 0, nil),
+ Valu("sb2", OpSB, TypeBytePtr, 0, nil),
+ Valu("addr1", OpAddr, TypeInt64Ptr, 0, nil, "sb1"),
+ Valu("addr2", OpAddr, TypeInt64Ptr, 0, nil, "sb2"),
+ Valu("a1ld", OpLoad, TypeInt64, 0, nil, "addr1", "start"),
+ Valu("a2ld", OpLoad, TypeInt64, 0, nil, "addr2", "start"),
+ Valu("c1", OpConst64, TypeInt64, 1, nil),
+ Valu("r1", OpAdd64, TypeInt64, 0, nil, "a1ld", "c1"),
+ Valu("c2", OpConst64, TypeInt64, 1, nil),
+ Valu("r2", OpAdd64, TypeInt64, 0, nil, "a2ld", "c2"),
+ Valu("r3", OpAdd64, TypeInt64, 0, nil, "r1", "r2"),
+ Valu("raddr", OpAddr, TypeInt64Ptr, 0, nil, "sp"),
+ Valu("raddrdef", OpVarDef, TypeMem, 0, nil, "start"),
+ Valu("rstore", OpStore, TypeMem, 8, nil, "raddr", "r3", "raddrdef"),
+ Goto("exit")),
+ Bloc("exit",
+ Exit("rstore")))
+
+ CheckFunc(fun.f)
+ zcse(fun.f)
+ deadcode(fun.f)
+ CheckFunc(fun.f)
+
+ if fun.values["c1"].Op != OpInvalid && fun.values["c2"].Op != OpInvalid {
+ t.Errorf("zsce should have removed c1 or c2")
+ }
+ if fun.values["sb1"].Op != OpInvalid && fun.values["sb2"].Op != OpInvalid {
+ t.Errorf("zsce should have removed sb1 or sb2")
+ }
}
--- /dev/null
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// zcse does an initial pass of common-subexpression elimination on the
+// function for values with zero arguments to allow the more expensive cse
+// to begin with a reduced number of values. Values are just relinked,
+// nothing is deleted. A subsequent deadcode pass is required to actually
+// remove duplicate expressions.
+func zcse(f *Func) {
+ vals := make(map[vkey]*Value)
+
+ for _, b := range f.Blocks {
+ for i := 0; i < len(b.Values); {
+ v := b.Values[i]
+ next := true
+ switch v.Op {
+ case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F,
+ OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface:
+ key := vkey{v.Op, keyFor(v), typeStr(v)}
+ if vals[key] == nil {
+ vals[key] = v
+ if b != f.Entry {
+ // Move v to the entry block so it will dominate every block
+ // where we might use it. This prevents the need for any dominator
+ // calculations in this pass.
+ v.Block = f.Entry
+ f.Entry.Values = append(f.Entry.Values, v)
+ last := len(b.Values) - 1
+ b.Values[i] = b.Values[last]
+ b.Values[last] = nil
+ b.Values = b.Values[:last]
+
+ // process b.Values[i] again
+ next = false
+ }
+ }
+ }
+ if next {
+ i++
+ }
+ }
+ }
+
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ for i, a := range v.Args {
+ // TODO: encode arglen in the opcode table, then do this switch with a table lookup?
+ switch a.Op {
+ case OpSB, OpConst64, OpConst32, OpConst16, OpConst8, OpConst64F,
+ OpConst32F, OpConstBool, OpConstNil, OpConstSlice, OpConstInterface:
+ key := vkey{a.Op, keyFor(a), typeStr(a)}
+ if rv, ok := vals[key]; ok {
+ v.Args[i] = rv
+ }
+ }
+ }
+ }
+ }
+}
+
+// vkey is a type used to uniquely identify a zero arg value.
+type vkey struct {
+ op Op
+ a int64 // aux
+ t string // type
+}
+
+// typeStr returns a string version of the type of v.
+func typeStr(v *Value) string {
+ if v.Type == nil {
+ return ""
+ }
+ return v.Type.String()
+}
+
+// keyFor returns the AuxInt portion of a key structure uniquely identifying a
+// zero arg value for the supported ops.
+func keyFor(v *Value) int64 {
+ switch v.Op {
+ case OpConst64, OpConst64F, OpConst32F:
+ return v.AuxInt
+ case OpConst32:
+ return int64(int32(v.AuxInt))
+ case OpConst16:
+ return int64(int16(v.AuxInt))
+ case OpConst8, OpConstBool:
+ return int64(int8(v.AuxInt))
+ default:
+ // Also matches OpSB, OpConstNil, OpConstSlice, OpConstInterface:
+ return 0
+ }
+}