package ssa
-import "cmd/compile/internal/types"
+import (
+ "cmd/compile/internal/types"
+ "cmd/internal/src"
+ "fmt"
+ "sort"
+)
// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
-// that is more oriented to a platform's ABI. The SelectN operations that extract results are also rewritten into
-// more appropriate forms.
+// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
+// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
+// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
+// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
+// split between registers and memory).
+//
+// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
func expandCalls(f *Func) {
+ if !LateCallExpansionEnabledWithin(f) {
+ return
+ }
canSSAType := f.fe.CanSSA
+ regSize := f.Config.RegSize
sp, _ := f.spSb()
+
+ debug := f.pass.debug > 0
+
+ // For 32-bit, need to deal with decomposition of 64-bit integers
+ tUint32 := types.Types[types.TUINT32]
+ tInt32 := types.Types[types.TINT32]
+ var hiOffset, lowOffset int64
+ if f.Config.BigEndian {
+ lowOffset = 4
+ } else {
+ hiOffset = 4
+ }
+ pairTypes := func(et types.EType) (tHi, tLo *types.Type) {
+ tHi = tUint32
+ if et == types.TINT64 {
+ tHi = tInt32
+ }
+ tLo = tUint32
+ return
+ }
+
+ // isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
+ // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
+ // Other aggregate types are expanded in decomposeBuiltin, which comes later.
+ isAlreadyExpandedAggregateType := func(t *types.Type) bool {
+ if !canSSAType(t) {
+ return false
+ }
+ return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
+ }
+
// Calls that need lowering have some number of inputs, including a memory input,
// and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
// With the current ABI, the outputs need to be converted to loads, which will all use the call's
// memory output as their input.
- // Step 1: find all references to calls as values and rewrite those.
- for _, b := range f.Blocks {
- for _, v := range b.Values {
- switch v.Op {
- case OpSelectN:
- call := v.Args[0]
- aux := call.Aux.(*AuxCall)
- which := v.AuxInt
- t := v.Type
- if which == aux.NResults() { // mem is after the results.
- // rewrite v as a Copy of call -- the replacement call will produce a mem.
- v.copyOf(call)
- } else {
- pt := types.NewPtr(t)
- if canSSAType(t) {
- off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
- v.reset(OpLoad)
- v.SetArgs2(off, call)
+ // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
+ // a chain of Struct/Array Select operations. If the chain of selectors does not
+ // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
+ // It emits the code necessary to implement the leaf select operation that leads to the call.
+ // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
+ var rewriteSelect func(leaf *Value, selector *Value, offset int64)
+ rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
+ switch selector.Op {
+ case OpSelectN:
+ // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+ call := selector.Args[0]
+ aux := call.Aux.(*AuxCall)
+ which := selector.AuxInt
+ if which == aux.NResults() { // mem is after the results.
+ // rewrite v as a Copy of call -- the replacement call will produce a mem.
+ leaf.copyOf(call)
+ } else {
+ leafType := leaf.Type
+ pt := types.NewPtr(leafType)
+ if canSSAType(leafType) {
+ off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
+ // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
+ if leaf.Block == call.Block {
+ leaf.reset(OpLoad)
+ leaf.SetArgs2(off, call)
} else {
- panic("Should not have non-SSA-able OpSelectN")
+ w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
+ leaf.copyOf(w)
}
+ } else {
+ panic("Should not have non-SSA-able OpSelectN")
}
- v.Type = t // not right for the mem operand yet, but will be when call is rewritten.
+ }
+ case OpStructSelect:
+ w := selector.Args[0]
+ if w.Type.Etype != types.TSTRUCT {
+ fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+ }
+ rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
- case OpSelectNAddr:
- call := v.Args[0]
- which := v.AuxInt
- aux := call.Aux.(*AuxCall)
- pt := v.Type
- off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
- v.copyOf(off)
+ case OpInt64Hi:
+ w := selector.Args[0]
+ rewriteSelect(leaf, w, offset+hiOffset)
+
+ case OpInt64Lo:
+ w := selector.Args[0]
+ rewriteSelect(leaf, w, offset+lowOffset)
+
+ case OpArraySelect:
+ w := selector.Args[0]
+ rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
+ default:
+ // Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
+ }
+ }
+
+ // storeArg converts stores of SSA-able aggregates into a series of stores of smaller types into
+ // individual parameter slots.
+ // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
+ var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
+ storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+ switch a.Op {
+ case OpArrayMake0, OpStructMake0:
+ return mem
+ case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
+ for i := 0; i < t.NumFields(); i++ {
+ fld := t.Field(i)
+ mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
+ }
+ return mem
+ case OpArrayMake1:
+ return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
+
+ case OpInt64Make:
+ tHi, tLo := pairTypes(t.Etype)
+ mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
+ return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+ }
+ dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
+ x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
+ if debug {
+ fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
+ }
+ return x
+ }
+
+ // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+ // TODO should also optimize offsets from SB?
+ offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
+ pt := types.NewPtr(t)
+ if offset == 0 && dst.Type == pt { // this is not actually likely
+ return dst
+ }
+ if dst.Op != OpOffPtr {
+ return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
+ }
+ // Simplify OpOffPtr
+ from := dst.Args[0]
+ offset += dst.AuxInt
+ if from == sp {
+ return f.ConstOffPtrSP(pt, offset, sp)
+ }
+ return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+ }
+
+ // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
+ // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
+ var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
+ splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
+ // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
+ pos := v.Pos.WithNotStmt()
+ switch t.Etype {
+ case types.TINT64, types.TUINT64:
+ if t.Width == regSize {
+ break
}
+ tHi, tLo := pairTypes(t.Etype)
+ sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
+ mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
+ return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+
+ case types.TARRAY:
+ elt := t.Elem()
+ for i := int64(0); i < t.NumElem(); i++ {
+ sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
+ mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ }
+ return mem
+ case types.TSTRUCT:
+ if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+ // This peculiar test deals with accesses to immediate interface data.
+ // It works okay because everything is the same size.
+ // Example code that triggers this can be found in go/constant/value.go, function ToComplex
+ // v119 (+881) = IData <intVal> v6
+ // v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
+ // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
+ // Guard against "struct{struct{*foo}}"
+ for t.Etype == types.TSTRUCT && t.NumFields() == 1 {
+ t = t.Field(0).Type
+ }
+ if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
+ f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct in it")
+ }
+ break // handle the leaf type.
+ }
+ for i := 0; i < t.NumFields(); i++ {
+ fld := t.Field(i)
+ sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
+ mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
+ firstStorePos = firstStorePos.WithNotStmt()
+ }
+ return mem
}
+ // Default, including for aggregates whose single element exactly fills their container
+ // TODO this will be a problem for cast interfaces containing floats when we move to registers.
+ x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
+ if debug {
+ fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
+ }
+ return x
}
- // Step 2: rewrite the calls
+ // Step 0: rewrite the calls to convert incoming args to stores.
for _, b := range f.Blocks {
for _, v := range b.Values {
switch v.Op {
case OpStaticLECall:
// Thread the stores on the memory arg
- m0 := v.Args[len(v.Args)-1]
+ m0 := v.MemoryArg()
mem := m0
pos := v.Pos.WithNotStmt()
aux := v.Aux.(*AuxCall)
- auxInt := v.AuxInt
for i, a := range v.Args {
- if a == m0 {
+ if a == m0 { // mem is last.
break
}
if a.Op == OpDereference {
// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
+ // TODO this will be more complicated with registers in the picture.
+ if a.MemoryArg() != m0 {
+ f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
+ }
src := a.Args[0]
dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(int64(i)), sp)
- a.reset(OpMove)
- a.Pos = pos
- a.Type = types.TypeMem
- a.Aux = aux.TypeOfArg(int64(i))
- a.AuxInt = aux.SizeOfArg(int64(i))
- a.SetArgs3(dst, src, mem)
- mem = a
+ if a.Uses == 1 {
+ a.reset(OpMove)
+ a.Pos = pos
+ a.Type = types.TypeMem
+ a.Aux = aux.TypeOfArg(int64(i))
+ a.AuxInt = aux.SizeOfArg(int64(i))
+ a.SetArgs3(dst, src, mem)
+ mem = a
+ } else {
+ mem = a.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(int64(i)), dst, src, mem)
+ mem.AuxInt = aux.SizeOfArg(int64(i))
+ }
} else {
- // Add a new store.
- t := aux.TypeOfArg(int64(i))
- dst := f.ConstOffPtrSP(types.NewPtr(t), aux.OffsetOfArg(int64(i)), sp)
- mem = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
+ mem = storeArg(pos, b, a, aux.TypeOfArg(int64(i)), aux.OffsetOfArg(int64(i)), mem)
}
}
- v.reset(OpStaticCall)
- v.Type = types.TypeMem
- v.Aux = aux
- v.AuxInt = auxInt
+ v.resetArgs()
v.SetArgs1(mem)
}
}
}
+
+ // Step 1: any stores of aggregates remaining are believed to be sourced from call results.
+ // Decompose those stores into a series of smaller stores, adding selection ops as necessary.
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ if v.Op == OpStore {
+ t := v.Aux.(*types.Type)
+ if isAlreadyExpandedAggregateType(t) {
+ dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
+ mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
+ v.copyOf(mem)
+ }
+ }
+ }
+ }
+
+ val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
+
+ // Step 2: accumulate selection operations for rewrite in topological order.
+ // Any select-for-addressing applied to call results can be transformed directly.
+ // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ // Accumulate chains of selectors for processing in topological order
+ switch v.Op {
+ case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
+ w := v.Args[0]
+ switch w.Op {
+ case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
+ val2Preds[w] += 1
+ if debug {
+ fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
+ }
+ }
+ fallthrough
+ case OpSelectN:
+ if _, ok := val2Preds[v]; !ok {
+ val2Preds[v] = 0
+ if debug {
+ fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+ }
+ }
+ case OpSelectNAddr:
+ // Do these directly, there are no chains of selectors.
+ call := v.Args[0]
+ which := v.AuxInt
+ aux := call.Aux.(*AuxCall)
+ pt := v.Type
+ off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
+ v.copyOf(off)
+ }
+ }
+ }
+
+ // Compilation must be deterministic
+ var ordered []*Value
+ less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
+
+ // Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
+ for len(val2Preds) > 0 {
+ ordered = ordered[:0]
+ for v, n := range val2Preds {
+ if n == 0 {
+ ordered = append(ordered, v)
+ }
+ }
+ sort.Slice(ordered, less)
+ for _, v := range ordered {
+ for {
+ w := v.Args[0]
+ if debug {
+ fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
+ }
+ delete(val2Preds, v)
+ rewriteSelect(v, v, 0)
+ v = w
+ n, ok := val2Preds[v]
+ if !ok {
+ break
+ }
+ if n != 1 {
+ val2Preds[v] = n - 1
+ break
+ }
+ // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
+ }
+ }
+ }
+
+ // Step 4: rewrite the calls themselves, correcting the type
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ switch v.Op {
+ case OpStaticLECall:
+ v.Op = OpStaticCall
+ v.Type = types.TypeMem
+ }
+ }
+ }
+
+ // Step 5: elide any copies introduced.
+ for _, b := range f.Blocks {
+ for _, v := range b.Values {
+ for i, a := range v.Args {
+ if a.Op != OpCopy {
+ continue
+ }
+ aa := copySource(a)
+ v.SetArg(i, aa)
+ for a.Uses == 0 {
+ b := a.Args[0]
+ a.reset(OpInvalid)
+ a = b
+ }
+ }
+ }
+ }
}