]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: delay expansion of OpArg until expand_calls
authorDavid Chase <drchase@google.com>
Tue, 13 Oct 2020 23:24:04 +0000 (19:24 -0400)
committerDavid Chase <drchase@google.com>
Thu, 29 Oct 2020 03:23:51 +0000 (03:23 +0000)
As it says, delay expanpsion of OpArg to the expand_calls phase,
to enable (eventually) interprocedural SSA optimizations, and
(sooner) change to a register ABI.

Includes a round of cleanup to function names and comments,
largely to match the expanded scope of the functions.

This CL removes the per-function dependence on GOSSAHASH,
but the go116lateCallExpansion kill switch remains (and was
tested locally to ensure it worked).

Two functions in expand_calls.go that performed overlapping
things were combined into a single function that is called
twice.

Fixes #42236.
For #40724.

Change-Id: Icbb78947eaa39f17f2c1210d5c2caef20abd6571
Reviewed-on: https://go-review.googlesource.com/c/go/+/262117
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/fmtmap_test.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/expand_calls.go
src/cmd/compile/internal/ssa/gen/dec64.rules
src/cmd/compile/internal/ssa/rewritedec64.go
src/cmd/compile/internal/ssa/stackalloc.go

index f8c33ec1f9b21ff7453f113729bb09ab02e32e8a..179c60187fe9d6328fe250fa31c6478d0b9c8446 100644 (file)
@@ -136,7 +136,6 @@ var knownFormats = map[string]string{
        "cmd/compile/internal/types.EType %s":             "",
        "cmd/compile/internal/types.EType %v":             "",
        "cmd/internal/obj.ABI %v":                         "",
-       "cmd/internal/src.XPos %v":                        "",
        "error %v":                                        "",
        "float64 %.2f":                                    "",
        "float64 %.3f":                                    "",
index fb9d3e811a3b92699376f5212de8cd1c3cb375f2..45d628cc5e8ed0355594c7074963ccbbff8f93a8 100644 (file)
@@ -409,11 +409,17 @@ func buildssa(fn *Node, worker int) *ssa.Func {
 
        // Generate addresses of local declarations
        s.decladdrs = map[*Node]*ssa.Value{}
+       var args []ssa.Param
+       var results []ssa.Param
        for _, n := range fn.Func.Dcl {
                switch n.Class() {
-               case PPARAM, PPARAMOUT:
+               case PPARAM:
+                       s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
+                       args = append(args, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+               case PPARAMOUT:
                        s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
-                       if n.Class() == PPARAMOUT && s.canSSA(n) {
+                       results = append(results, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+                       if s.canSSA(n) {
                                // Save ssa-able PPARAMOUT variables so we can
                                // store them back to the stack at the end of
                                // the function.
@@ -4909,7 +4915,7 @@ func (s *state) canSSA(n *Node) bool {
        if n.Class() == PPARAM && n.Sym != nil && n.Sym.Name == ".this" {
                // wrappers generated by genwrapper need to update
                // the .this pointer in place.
-               // TODO: treat as a PPARMOUT?
+               // TODO: treat as a PPARAMOUT?
                return false
        }
        return canSSAType(n.Type)
index bddd271273a9a306cd9805452a09ddbd775b79ca..9ddc53060ce3e3ed0348e99b7abeb34340ed0882 100644 (file)
@@ -429,7 +429,7 @@ var passes = [...]pass{
        {name: "early copyelim", fn: copyelim},
        {name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt
        {name: "short circuit", fn: shortcircuit},
-       {name: "decompose args", fn: decomposeArgs, required: true},
+       {name: "decompose args", fn: decomposeArgs, required: !go116lateCallExpansion, disabled: go116lateCallExpansion}, // handled by late call lowering
        {name: "decompose user", fn: decomposeUser, required: true},
        {name: "pre-opt deadcode", fn: deadcode},
        {name: "opt", fn: opt, required: true},               // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules
index cb6f6fe7a14a208c9db5c88786f3768fd5c6fc08..0fe0337ddfa309c4d6bab1538fd31faf5a5bfaef 100644 (file)
@@ -199,9 +199,9 @@ const (
 const go116lateCallExpansion = true
 
 // LateCallExpansionEnabledWithin returns true if late call expansion should be tested
-// within compilation of a function/method triggered by GOSSAHASH (defaults to "yes").
+// within compilation of a function/method.
 func LateCallExpansionEnabledWithin(f *Func) bool {
-       return go116lateCallExpansion && f.DebugTest // Currently set up for GOSSAHASH bug searches
+       return go116lateCallExpansion
 }
 
 // NewConfig returns a new configuration object for the given architecture.
index 3e3573ff394ce95d60de24471e0a10efdd30c2a4..fbde19d94c29927a2a50a0d396ced550fed31dcb 100644 (file)
@@ -15,7 +15,7 @@ type selKey struct {
        from   *Value
        offset int64
        size   int64
-       typ    types.EType
+       typ    *types.Type
 }
 
 type offsetKey struct {
@@ -27,7 +27,8 @@ type offsetKey struct {
 // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
 // that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
 // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached.
+// reached.  On the callee side, OpArg nodes are not decomposed until this phase is run.
+// TODO results should not be lowered until this phase.
 func expandCalls(f *Func) {
        // Calls that need lowering have some number of inputs, including a memory input,
        // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
@@ -42,6 +43,10 @@ func expandCalls(f *Func) {
        }
        debug := f.pass.debug > 0
 
+       if debug {
+               fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
+       }
+
        canSSAType := f.fe.CanSSA
        regSize := f.Config.RegSize
        sp, _ := f.spSb()
@@ -58,6 +63,10 @@ func expandCalls(f *Func) {
 
        namedSelects := make(map[*Value][]namedVal)
 
+       sdom := f.Sdom()
+
+       common := make(map[selKey]*Value)
+
        // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
        // that has no 64-bit integer registers.
        intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
@@ -107,6 +116,7 @@ func expandCalls(f *Func) {
                return v
        }
 
+       // splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates.
        splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
                var locs []LocalSlot
                for i := range ls {
@@ -147,21 +157,103 @@ func expandCalls(f *Func) {
        // With the current ABI, the outputs need to be converted to loads, which will all use the call's
        // memory output as their input.
 
-       // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
-       // a chain of Struct/Array Select operations.  If the chain of selectors does not
-       // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
-       // It emits the code necessary to implement the leaf select operation that leads to the call.
+       // rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg)
+       // through a chain of Struct/Array/builtin Select operations.  If the chain of selectors does not
+       // end in an expected root, it does nothing (this can happen depending on compiler phase ordering).
+       // The "leaf" provides the type, the root supplies the container, and the leaf-to-root path
+       // accumulates the offset.
+       // It emits the code necessary to implement the leaf select operation that leads to the root.
+       //
        // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
        var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
        rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
+               if debug {
+                       fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
+               }
                var locs []LocalSlot
                leafType := leaf.Type
+               if len(selector.Args) > 0 {
+                       w := selector.Args[0]
+                       if w.Op == OpCopy {
+                               for w.Op == OpCopy {
+                                       w = w.Args[0]
+                               }
+                               selector.SetArg(0, w)
+                       }
+               }
                switch selector.Op {
-               case OpSelectN:
-                       // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+               case OpArg:
+                       if !isAlreadyExpandedAggregateType(selector.Type) {
+                               if leafType == selector.Type { // OpIData leads us here, sometimes.
+                                       leaf.copyOf(selector)
+                               } else {
+                                       f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
+                               }
+                               if debug {
+                                       fmt.Printf("\tOpArg, break\n")
+                               }
+                               break
+                       }
+                       if leaf.Op == OpIData {
+                               leafType = removeTrivialWrapperTypes(leaf.Type)
+                       }
+                       aux := selector.Aux
+                       auxInt := selector.AuxInt + offset
+                       if leaf.Block == selector.Block {
+                               leaf.reset(OpArg)
+                               leaf.Aux = aux
+                               leaf.AuxInt = auxInt
+                               leaf.Type = leafType
+                       } else {
+                               w := selector.Block.NewValue0IA(leaf.Pos, OpArg, leafType, auxInt, aux)
+                               leaf.copyOf(w)
+                               if debug {
+                                       fmt.Printf("\tnew %s\n", w.LongString())
+                               }
+                       }
                        for _, s := range namedSelects[selector] {
                                locs = append(locs, f.Names[s.locIndex])
                        }
+
+               case OpLoad: // We end up here because of IData of immediate structures.
+                       // Failure case:
+                       // (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as
+                       // the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat).
+                       //
+                       // GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc
+                       // cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR
+                       // b2: ← b1
+                       // v20 (+142) = StaticLECall <interface {},mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1
+                       // v21 (142) = SelectN <mem> [1] v20
+                       // v22 (142) = SelectN <interface {}> [0] v20
+                       // b15: ← b8
+                       // v71 (+143) = IData <Nodes> v22 (v[Nodes])
+                       // v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21
+                       //
+                       // translates (w/o the "case OpLoad:" above) to:
+                       //
+                       // b2: ← b1
+                       // v20 (+142) = StaticCall <mem> {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715
+                       // v23 (142) = Load <*uintptr> v19 v20
+                       // v823 (142) = IsNonNil <bool> v23
+                       // v67 (+143) = Load <*[]*Node> v880 v20
+                       // b15: ← b8
+                       // v827 (146) = StructSelect <*[]*Node> [0] v67
+                       // v846 (146) = Store <mem> {*[]*Node} v769 v827 v20
+                       // v73 (+146) = StaticCall <mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846
+                       // i.e., the struct select is generated and remains in because it is not applied to an actual structure.
+                       // The OpLoad was created to load the single field of the IData
+                       // This case removes that StructSelect.
+                       if leafType != selector.Type {
+                               f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString())
+                       }
+                       leaf.copyOf(selector)
+                       for _, s := range namedSelects[selector] {
+                               locs = append(locs, f.Names[s.locIndex])
+                       }
+
+               case OpSelectN:
+                       // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
                        call := selector.Args[0]
                        aux := call.Aux.(*AuxCall)
                        which := selector.AuxInt
@@ -171,10 +263,6 @@ func expandCalls(f *Func) {
                        } else {
                                leafType := removeTrivialWrapperTypes(leaf.Type)
                                if canSSAType(leafType) {
-                                       for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 {
-                                               // This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA
-                                               leafType = leafType.Field(0).Type
-                                       }
                                        pt := types.NewPtr(leafType)
                                        off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
                                        // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
@@ -185,22 +273,29 @@ func expandCalls(f *Func) {
                                        } else {
                                                w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
                                                leaf.copyOf(w)
+                                               if debug {
+                                                       fmt.Printf("\tnew %s\n", w.LongString())
+                                               }
+                                       }
+                                       for _, s := range namedSelects[selector] {
+                                               locs = append(locs, f.Names[s.locIndex])
                                        }
                                } else {
                                        f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
                                }
                        }
+
                case OpStructSelect:
                        w := selector.Args[0]
                        var ls []LocalSlot
-                       if w.Type.Etype != types.TSTRUCT {
-                               f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
-                               // Artifact of immediate interface idata
+                       if w.Type.Etype != types.TSTRUCT { // IData artifact
                                ls = rewriteSelect(leaf, w, offset)
                        } else {
                                ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
-                               for _, l := range ls {
-                                       locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+                               if w.Op != OpIData {
+                                       for _, l := range ls {
+                                               locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+                                       }
                                }
                        }
 
@@ -221,9 +316,7 @@ func expandCalls(f *Func) {
                case OpStringPtr:
                        ls := rewriteSelect(leaf, selector.Args[0], offset)
                        locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
-                       //for i := range ls {
-                       //      locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr))
-                       //}
+
                case OpSlicePtr:
                        w := selector.Args[0]
                        ls := rewriteSelect(leaf, w, offset)
@@ -272,32 +365,130 @@ func expandCalls(f *Func) {
                return locs
        }
 
-       // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
-       // smaller types into individual parameter slots.
-       var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
-       storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+       // storeArgOrLoad converts stores of SSA-able aggregate arguments (passed to a call) into a series of primitive-typed
+       // stores of non-aggregate types.  It recursively walks up a chain of selectors until it reaches a Load or an Arg.
+       // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
+       var storeArgOrLoad func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value
+
+       // decomposeArgOrLoad is a helper for storeArgOrLoad.
+       // It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
+       // passed to it, and returns the new mem. If the type does not match one of the expected aggregate types, it returns nil instead.
+       decomposeArgOrLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64,
+               decomposeOne func(pos src.XPos, b *Block, base, source, mem *Value, t1 *types.Type, offArg, offStore int64) *Value,
+               decomposeTwo func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value) *Value {
+               u := source.Type
+               switch u.Etype {
+               case types.TARRAY:
+                       elem := u.Elem()
+                       for i := int64(0); i < u.NumElem(); i++ {
+                               elemOff := i * elem.Size()
+                               mem = decomposeOne(pos, b, base, source, mem, elem, source.AuxInt+elemOff, offset+elemOff)
+                               pos = pos.WithNotStmt()
+                       }
+                       return mem
+               case types.TSTRUCT:
+                       for i := 0; i < u.NumFields(); i++ {
+                               fld := u.Field(i)
+                               mem = decomposeOne(pos, b, base, source, mem, fld.Type, source.AuxInt+fld.Offset, offset+fld.Offset)
+                               pos = pos.WithNotStmt()
+                       }
+                       return mem
+               case types.TINT64, types.TUINT64:
+                       if t.Width == regSize {
+                               break
+                       }
+                       tHi, tLo := intPairTypes(t.Etype)
+                       mem = decomposeOne(pos, b, base, source, mem, tHi, source.AuxInt+hiOffset, offset+hiOffset)
+                       pos = pos.WithNotStmt()
+                       return decomposeOne(pos, b, base, source, mem, tLo, source.AuxInt+lowOffset, offset+lowOffset)
+               case types.TINTER:
+                       return decomposeTwo(pos, b, base, source, mem, typ.Uintptr, typ.BytePtr, source.AuxInt, offset)
+               case types.TSTRING:
+                       return decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+               case types.TCOMPLEX64:
+                       return decomposeTwo(pos, b, base, source, mem, typ.Float32, typ.Float32, source.AuxInt, offset)
+               case types.TCOMPLEX128:
+                       return decomposeTwo(pos, b, base, source, mem, typ.Float64, typ.Float64, source.AuxInt, offset)
+               case types.TSLICE:
+                       mem = decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+                       return decomposeOne(pos, b, base, source, mem, typ.Int, source.AuxInt+2*ptrSize, offset+2*ptrSize)
+               }
+               return nil
+       }
+
+       // storeOneArg creates a decomposed (one step) arg that is then stored.
+       // pos and b locate the store instruction, base is the base of the store target, source is the "base" of the value input,
+       // mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases.
+       storeOneArg := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+               w := common[selKey{source, offArg, t.Width, t}]
+               if w == nil {
+                       w = source.Block.NewValue0IA(source.Pos, OpArg, t, offArg, source.Aux)
+                       common[selKey{source, offArg, t.Width, t}] = w
+               }
+               return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+       }
+
+       // storeOneLoad creates a decomposed (one step) load that is then stored.
+       storeOneLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+               from := offsetFrom(source.Args[0], offArg, types.NewPtr(t))
+               w := source.Block.NewValue2(source.Pos, OpLoad, t, from, mem)
+               return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+       }
+
+       storeTwoArg := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+               mem = storeOneArg(pos, b, base, source, mem, t1, offArg, offStore)
+               pos = pos.WithNotStmt()
+               t1Size := t1.Size()
+               return storeOneArg(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+       }
+
+       storeTwoLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+               mem = storeOneLoad(pos, b, base, source, mem, t1, offArg, offStore)
+               pos = pos.WithNotStmt()
+               t1Size := t1.Size()
+               return storeOneLoad(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+       }
+
+       storeArgOrLoad = func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value {
                if debug {
-                       fmt.Printf("\tstoreArg(%s;  %s;  %v;  %d;  %s)\n", b, a.LongString(), t, offset, mem.String())
+                       fmt.Printf("\tstoreArgOrLoad(%s;  %s;  %s;  %s; %d)\n", base.LongString(), source.LongString(), mem.String(), t.String(), offset)
                }
 
-               switch a.Op {
+               switch source.Op {
+               case OpCopy:
+                       return storeArgOrLoad(pos, b, base, source.Args[0], mem, t, offset)
+
+               case OpLoad:
+                       ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneLoad, storeTwoLoad)
+                       if ret != nil {
+                               return ret
+                       }
+
+               case OpArg:
+                       ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneArg, storeTwoArg)
+                       if ret != nil {
+                               return ret
+                       }
+
                case OpArrayMake0, OpStructMake0:
                        return mem
 
                case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
                        for i := 0; i < t.NumFields(); i++ {
                                fld := t.Field(i)
-                               mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
+                               mem = storeArgOrLoad(pos, b, base, source.Args[i], mem, fld.Type, offset+fld.Offset)
+                               pos = pos.WithNotStmt()
                        }
                        return mem
 
                case OpArrayMake1:
-                       return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
+                       return storeArgOrLoad(pos, b, base, source.Args[0], mem, t.Elem(), offset)
 
                case OpInt64Make:
                        tHi, tLo := intPairTypes(t.Etype)
-                       mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
-                       return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+                       mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tHi, offset+hiOffset)
+                       pos = pos.WithNotStmt()
+                       return storeArgOrLoad(pos, b, base, source.Args[1], mem, tLo, offset+lowOffset)
 
                case OpComplexMake:
                        tPart := typ.Float32
@@ -305,59 +496,45 @@ func expandCalls(f *Func) {
                        if wPart == 8 {
                                tPart = typ.Float64
                        }
-                       mem = storeArg(pos, b, a.Args[0], tPart, offset, mem)
-                       return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem)
+                       mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tPart, offset)
+                       pos = pos.WithNotStmt()
+                       return storeArgOrLoad(pos, b, base, source.Args[1], mem, tPart, offset+wPart)
 
                case OpIMake:
-                       mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem)
-                       return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem)
+                       mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.Uintptr, offset)
+                       pos = pos.WithNotStmt()
+                       return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.BytePtr, offset+ptrSize)
 
                case OpStringMake:
-                       mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
-                       return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+                       mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+                       pos = pos.WithNotStmt()
+                       return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
 
                case OpSliceMake:
-                       mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
-                       mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
-                       return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem)
+                       mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+                       pos = pos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
+                       return storeArgOrLoad(pos, b, base, source.Args[2], mem, typ.Int, offset+2*ptrSize)
                }
 
-               dst := offsetFrom(sp, offset, types.NewPtr(t))
-               x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
-               if debug {
-                       fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
-               }
-               return x
-       }
-
-       // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
-       // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
-       // This has to handle aggregate types that have already been lowered by an earlier phase.
-       var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
-       splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
-               if debug {
-                       fmt.Printf("\tsplitStore(%s;  %s;  %s;  %s;  %v;  %d;  %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos)
-               }
-               pos := v.Pos.WithNotStmt()
+               // For nodes that cannot be taken apart -- OpSelectN, other structure selectors.
                switch t.Etype {
                case types.TARRAY:
                        elt := t.Elem()
-                       if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
+                       if source.Type != t && t.NumElem() == 1 && elt.Width == t.Width && t.Width == regSize {
                                t = removeTrivialWrapperTypes(t)
-                               if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
-                                       f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
-                               }
-                               break // handle the leaf type.
+                               // it could be a leaf type, but the "leaf" could be complex64 (for example)
+                               return storeArgOrLoad(pos, b, base, source, mem, t, offset)
                        }
                        for i := int64(0); i < t.NumElem(); i++ {
                                sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
-                               mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
-                               firstStorePos = firstStorePos.WithNotStmt()
+                               mem = storeArgOrLoad(pos, b, base, sel, mem, elt, offset+i*elt.Width)
+                               pos = pos.WithNotStmt()
                        }
                        return mem
 
                case types.TSTRUCT:
-                       if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+                       if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
                                // This peculiar test deals with accesses to immediate interface data.
                                // It works okay because everything is the same size.
                                // Example code that triggers this can be found in go/constant/value.go, function ToComplex
@@ -377,16 +554,15 @@ func expandCalls(f *Func) {
                                // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
                                // of a *uint8, which does not succeed.
                                t = removeTrivialWrapperTypes(t)
-
                                // it could be a leaf type, but the "leaf" could be complex64 (for example)
-                               return splitStore(dest, source, mem, v, t, offset, firstStorePos)
+                               return storeArgOrLoad(pos, b, base, source, mem, t, offset)
                        }
 
                        for i := 0; i < t.NumFields(); i++ {
                                fld := t.Field(i)
                                sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
-                               mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
-                               firstStorePos = firstStorePos.WithNotStmt()
+                               mem = storeArgOrLoad(pos, b, base, sel, mem, fld.Type, offset+fld.Offset)
+                               pos = pos.WithNotStmt()
                        }
                        return mem
 
@@ -396,56 +572,55 @@ func expandCalls(f *Func) {
                        }
                        tHi, tLo := intPairTypes(t.Etype)
                        sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
-                       mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, tHi, offset+hiOffset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
-                       return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, tLo, offset+lowOffset)
 
                case types.TINTER:
                        sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
-                       mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
-                       return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset+ptrSize)
 
                case types.TSTRING:
                        sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
-                       mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
-                       return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
 
                case types.TSLICE:
                        et := types.NewPtr(t.Elem())
                        sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
-                       mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, et, offset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
-                       mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
                        sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
-                       return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+2*ptrSize)
 
                case types.TCOMPLEX64:
                        sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
-                       mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
-                       return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset+4)
 
                case types.TCOMPLEX128:
                        sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
-                       mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
+                       mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset)
+                       pos = pos.WithNotStmt()
                        sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
-                       return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos)
+                       return storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset+8)
                }
-               // Default, including for aggregates whose single element exactly fills their container
-               // TODO this will be a problem for cast interfaces containing floats when we move to registers.
-               x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem)
+
+               dst := offsetFrom(base, offset, types.NewPtr(t))
+               x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
                if debug {
-                       fmt.Printf("\t\tsplitStore returns %s\n", x.LongString())
+                       fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
                }
-
                return x
        }
 
@@ -490,7 +665,7 @@ func expandCalls(f *Func) {
                                if debug {
                                        fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
                                }
-                               mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
+                               mem = storeArgOrLoad(pos, v.Block, sp, a, mem, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
                        }
                }
                v.resetArgs()
@@ -523,7 +698,7 @@ func expandCalls(f *Func) {
                t := name.Type
                if isAlreadyExpandedAggregateType(t) {
                        for j, v := range f.NamedValues[name] {
-                               if v.Op == OpSelectN {
+                               if v.Op == OpSelectN || v.Op == OpArg && isAlreadyExpandedAggregateType(v.Type) {
                                        ns := namedSelects[v]
                                        namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
                                }
@@ -531,17 +706,19 @@ func expandCalls(f *Func) {
                }
        }
 
-       // Step 1: any stores of aggregates remaining are believed to be sourced from call results.
+       // Step 1: any stores of aggregates remaining are believed to be sourced from call results or args.
        // Decompose those stores into a series of smaller stores, adding selection ops as necessary.
        for _, b := range f.Blocks {
                for _, v := range b.Values {
                        if v.Op == OpStore {
                                t := v.Aux.(*types.Type)
+                               source := v.Args[1]
+                               tSrc := source.Type
                                iAEATt := isAlreadyExpandedAggregateType(t)
+
                                if !iAEATt {
                                        // guarding against store immediate struct into interface data field -- store type is *uint8
                                        // TODO can this happen recursively?
-                                       tSrc := v.Args[1].Type
                                        iAEATt = isAlreadyExpandedAggregateType(tSrc)
                                        if iAEATt {
                                                t = tSrc
@@ -551,8 +728,8 @@ func expandCalls(f *Func) {
                                        if debug {
                                                fmt.Printf("Splitting store %s\n", v.LongString())
                                        }
-                                       dst, source, mem := v.Args[0], v.Args[1], v.Args[2]
-                                       mem = splitStore(dst, source, mem, v, t, 0, v.Pos)
+                                       dst, mem := v.Args[0], v.Args[2]
+                                       mem = storeArgOrLoad(v.Pos, b, dst, source, mem, t, 0)
                                        v.copyOf(mem)
                                }
                        }
@@ -579,7 +756,7 @@ func expandCalls(f *Func) {
                                OpInt64Hi, OpInt64Lo:
                                w := v.Args[0]
                                switch w.Op {
-                               case OpStructSelect, OpArraySelect, OpSelectN:
+                               case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
                                        val2Preds[w] += 1
                                        if debug {
                                                fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
@@ -595,6 +772,17 @@ func expandCalls(f *Func) {
                                        }
                                }
 
+                       case OpArg:
+                               if !isAlreadyExpandedAggregateType(v.Type) {
+                                       continue
+                               }
+                               if _, ok := val2Preds[v]; !ok {
+                                       val2Preds[v] = 0
+                                       if debug {
+                                               fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+                                       }
+                               }
+
                        case OpSelectNAddr:
                                // Do these directly, there are no chains of selectors.
                                call := v.Args[0]
@@ -612,7 +800,6 @@ func expandCalls(f *Func) {
        // then forwards to rewrite selectors.
        //
        // All chains of selectors end up in same block as the call.
-       sdom := f.Sdom()
 
        // Compilation must be deterministic, so sort after extracting first zeroes from map.
        // Sorting allows dominators-last order within each batch,
@@ -640,8 +827,11 @@ func expandCalls(f *Func) {
                last = len(allOrdered)
                sort.SliceStable(toProcess, less)
                for _, v := range toProcess {
-                       w := v.Args[0]
                        delete(val2Preds, v)
+                       if v.Op == OpArg {
+                               continue // no Args[0], hence done.
+                       }
+                       w := v.Args[0]
                        n, ok := val2Preds[w]
                        if !ok {
                                continue
@@ -655,13 +845,19 @@ func expandCalls(f *Func) {
                }
        }
 
-       common := make(map[selKey]*Value)
+       common = make(map[selKey]*Value)
        // Rewrite duplicate selectors as copies where possible.
        for i := len(allOrdered) - 1; i >= 0; i-- {
                v := allOrdered[i]
+               if v.Op == OpArg {
+                       continue
+               }
                w := v.Args[0]
-               for w.Op == OpCopy {
-                       w = w.Args[0]
+               if w.Op == OpCopy {
+                       for w.Op == OpCopy {
+                               w = w.Args[0]
+                       }
+                       v.SetArg(0, w)
                }
                typ := v.Type
                if typ.IsMemory() {
@@ -691,7 +887,7 @@ func expandCalls(f *Func) {
                case OpComplexImag:
                        offset = size
                }
-               sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype}
+               sk := selKey{from: w, size: size, offset: offset, typ: typ}
                dupe := common[sk]
                if dupe == nil {
                        common[sk] = v
index 07607960fa04a8744d03ea12aedd7b958fc9f51b..9297ed8d2e0a04aeedb3d831210704c6f6205307 100644 (file)
                lo
                (Store {hi.Type} dst hi mem))
 
-(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() =>
+// These are not enabled during decomposeBuiltin if late call expansion, but they are always enabled for softFloat
+(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.Int32> {n} [off+4])
     (Arg <typ.UInt32> {n} [off]))
-(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")  =>
   (Int64Make
     (Arg <typ.UInt32> {n} [off+4])
     (Arg <typ.UInt32> {n} [off]))
 
-(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.Int32> {n} [off])
     (Arg <typ.UInt32> {n} [off+4]))
-(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() =>
+(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") =>
   (Int64Make
     (Arg <typ.UInt32> {n} [off])
     (Arg <typ.UInt32> {n} [off+4]))
index 8b9753414f4a235f1aa02c18f613ae307d023ca5..c49bc8043e7f2b584a5bd19266b55a6a10514b58 100644 (file)
@@ -184,12 +184,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
        config := b.Func.Config
        typ := &b.Func.Config.Types
        // match: (Arg {n} [off])
-       // cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned()
+       // cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
        // result: (Int64Make (Arg <typ.Int32> {n} [off+4]) (Arg <typ.UInt32> {n} [off]))
        for {
                off := auxIntToInt32(v.AuxInt)
                n := auxToSym(v.Aux)
-               if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned()) {
+               if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
                        break
                }
                v.reset(OpInt64Make)
@@ -203,12 +203,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
                return true
        }
        // match: (Arg {n} [off])
-       // cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned()
+       // cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
        // result: (Int64Make (Arg <typ.UInt32> {n} [off+4]) (Arg <typ.UInt32> {n} [off]))
        for {
                off := auxIntToInt32(v.AuxInt)
                n := auxToSym(v.Aux)
-               if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned()) {
+               if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
                        break
                }
                v.reset(OpInt64Make)
@@ -222,12 +222,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
                return true
        }
        // match: (Arg {n} [off])
-       // cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned()
+       // cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
        // result: (Int64Make (Arg <typ.Int32> {n} [off]) (Arg <typ.UInt32> {n} [off+4]))
        for {
                off := auxIntToInt32(v.AuxInt)
                n := auxToSym(v.Aux)
-               if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned()) {
+               if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
                        break
                }
                v.reset(OpInt64Make)
@@ -241,12 +241,12 @@ func rewriteValuedec64_OpArg(v *Value) bool {
                return true
        }
        // match: (Arg {n} [off])
-       // cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned()
+       // cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")
        // result: (Int64Make (Arg <typ.UInt32> {n} [off]) (Arg <typ.UInt32> {n} [off+4]))
        for {
                off := auxIntToInt32(v.AuxInt)
                n := auxToSym(v.Aux)
-               if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned()) {
+               if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) {
                        break
                }
                v.reset(OpInt64Make)
index 76125851365ae08fb7203d9043589494333cfa85..406a3c3ea53faa46455823ee34309ddaa16be37f 100644 (file)
@@ -153,6 +153,9 @@ func (s *stackAllocState) stackalloc() {
                if v.Op != OpArg {
                        continue
                }
+               if v.Aux == nil {
+                       f.Fatalf("%s has nil Aux\n", v.LongString())
+               }
                loc := LocalSlot{N: v.Aux.(GCNode), Type: v.Type, Off: v.AuxInt}
                if f.pass.debug > stackDebug {
                        fmt.Printf("stackalloc %s to %s\n", v, loc)