From 15f01d6ae9853fd51ee8842d9af93d04ce25458c Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 13 Oct 2020 19:24:04 -0400 Subject: [PATCH] cmd/compile: delay expansion of OpArg until expand_calls As it says, delay expanpsion of OpArg to the expand_calls phase, to enable (eventually) interprocedural SSA optimizations, and (sooner) change to a register ABI. Includes a round of cleanup to function names and comments, largely to match the expanded scope of the functions. This CL removes the per-function dependence on GOSSAHASH, but the go116lateCallExpansion kill switch remains (and was tested locally to ensure it worked). Two functions in expand_calls.go that performed overlapping things were combined into a single function that is called twice. Fixes #42236. For #40724. Change-Id: Icbb78947eaa39f17f2c1210d5c2caef20abd6571 Reviewed-on: https://go-review.googlesource.com/c/go/+/262117 Trust: David Chase Run-TryBot: David Chase Reviewed-by: Cherry Zhang --- src/cmd/compile/fmtmap_test.go | 1 - src/cmd/compile/internal/gc/ssa.go | 12 +- src/cmd/compile/internal/ssa/compile.go | 2 +- src/cmd/compile/internal/ssa/config.go | 4 +- src/cmd/compile/internal/ssa/expand_calls.go | 406 ++++++++++++++----- src/cmd/compile/internal/ssa/gen/dec64.rules | 9 +- src/cmd/compile/internal/ssa/rewritedec64.go | 16 +- src/cmd/compile/internal/ssa/stackalloc.go | 3 + 8 files changed, 329 insertions(+), 124 deletions(-) diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go index f8c33ec1f9..179c60187f 100644 --- a/src/cmd/compile/fmtmap_test.go +++ b/src/cmd/compile/fmtmap_test.go @@ -136,7 +136,6 @@ var knownFormats = map[string]string{ "cmd/compile/internal/types.EType %s": "", "cmd/compile/internal/types.EType %v": "", "cmd/internal/obj.ABI %v": "", - "cmd/internal/src.XPos %v": "", "error %v": "", "float64 %.2f": "", "float64 %.3f": "", diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index fb9d3e811a..45d628cc5e 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -409,11 +409,17 @@ func buildssa(fn *Node, worker int) *ssa.Func { // Generate addresses of local declarations s.decladdrs = map[*Node]*ssa.Value{} + var args []ssa.Param + var results []ssa.Param for _, n := range fn.Func.Dcl { switch n.Class() { - case PPARAM, PPARAMOUT: + case PPARAM: + s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem) + args = append(args, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)}) + case PPARAMOUT: s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem) - if n.Class() == PPARAMOUT && s.canSSA(n) { + results = append(results, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)}) + if s.canSSA(n) { // Save ssa-able PPARAMOUT variables so we can // store them back to the stack at the end of // the function. @@ -4909,7 +4915,7 @@ func (s *state) canSSA(n *Node) bool { if n.Class() == PPARAM && n.Sym != nil && n.Sym.Name == ".this" { // wrappers generated by genwrapper need to update // the .this pointer in place. - // TODO: treat as a PPARMOUT? + // TODO: treat as a PPARAMOUT? return false } return canSSAType(n.Type) diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index bddd271273..9ddc53060c 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -429,7 +429,7 @@ var passes = [...]pass{ {name: "early copyelim", fn: copyelim}, {name: "early deadcode", fn: deadcode}, // remove generated dead code to avoid doing pointless work during opt {name: "short circuit", fn: shortcircuit}, - {name: "decompose args", fn: decomposeArgs, required: true}, + {name: "decompose args", fn: decomposeArgs, required: !go116lateCallExpansion, disabled: go116lateCallExpansion}, // handled by late call lowering {name: "decompose user", fn: decomposeUser, required: true}, {name: "pre-opt deadcode", fn: deadcode}, {name: "opt", fn: opt, required: true}, // NB: some generic rules know the name of the opt pass. TODO: split required rules and optimizing rules diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index cb6f6fe7a1..0fe0337ddf 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -199,9 +199,9 @@ const ( const go116lateCallExpansion = true // LateCallExpansionEnabledWithin returns true if late call expansion should be tested -// within compilation of a function/method triggered by GOSSAHASH (defaults to "yes"). +// within compilation of a function/method. func LateCallExpansionEnabledWithin(f *Func) bool { - return go116lateCallExpansion && f.DebugTest // Currently set up for GOSSAHASH bug searches + return go116lateCallExpansion } // NewConfig returns a new configuration object for the given architecture. diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index 3e3573ff39..fbde19d94c 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -15,7 +15,7 @@ type selKey struct { from *Value offset int64 size int64 - typ types.EType + typ *types.Type } type offsetKey struct { @@ -27,7 +27,8 @@ type offsetKey struct { // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form // that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are -// reached. +// reached. On the callee side, OpArg nodes are not decomposed until this phase is run. +// TODO results should not be lowered until this phase. func expandCalls(f *Func) { // Calls that need lowering have some number of inputs, including a memory input, // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. @@ -42,6 +43,10 @@ func expandCalls(f *Func) { } debug := f.pass.debug > 0 + if debug { + fmt.Printf("\nexpandsCalls(%s)\n", f.Name) + } + canSSAType := f.fe.CanSSA regSize := f.Config.RegSize sp, _ := f.spSb() @@ -58,6 +63,10 @@ func expandCalls(f *Func) { namedSelects := make(map[*Value][]namedVal) + sdom := f.Sdom() + + common := make(map[selKey]*Value) + // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target // that has no 64-bit integer registers. intPairTypes := func(et types.EType) (tHi, tLo *types.Type) { @@ -107,6 +116,7 @@ func expandCalls(f *Func) { return v } + // splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates. splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot { var locs []LocalSlot for i := range ls { @@ -147,21 +157,103 @@ func expandCalls(f *Func) { // With the current ABI, the outputs need to be converted to loads, which will all use the call's // memory output as their input. - // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through - // a chain of Struct/Array Select operations. If the chain of selectors does not - // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering). - // It emits the code necessary to implement the leaf select operation that leads to the call. + // rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg) + // through a chain of Struct/Array/builtin Select operations. If the chain of selectors does not + // end in an expected root, it does nothing (this can happen depending on compiler phase ordering). + // The "leaf" provides the type, the root supplies the container, and the leaf-to-root path + // accumulates the offset. + // It emits the code necessary to implement the leaf select operation that leads to the root. + // // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot { + if debug { + fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset) + } var locs []LocalSlot leafType := leaf.Type + if len(selector.Args) > 0 { + w := selector.Args[0] + if w.Op == OpCopy { + for w.Op == OpCopy { + w = w.Args[0] + } + selector.SetArg(0, w) + } + } switch selector.Op { - case OpSelectN: - // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there. + case OpArg: + if !isAlreadyExpandedAggregateType(selector.Type) { + if leafType == selector.Type { // OpIData leads us here, sometimes. + leaf.copyOf(selector) + } else { + f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString()) + } + if debug { + fmt.Printf("\tOpArg, break\n") + } + break + } + if leaf.Op == OpIData { + leafType = removeTrivialWrapperTypes(leaf.Type) + } + aux := selector.Aux + auxInt := selector.AuxInt + offset + if leaf.Block == selector.Block { + leaf.reset(OpArg) + leaf.Aux = aux + leaf.AuxInt = auxInt + leaf.Type = leafType + } else { + w := selector.Block.NewValue0IA(leaf.Pos, OpArg, leafType, auxInt, aux) + leaf.copyOf(w) + if debug { + fmt.Printf("\tnew %s\n", w.LongString()) + } + } for _, s := range namedSelects[selector] { locs = append(locs, f.Names[s.locIndex]) } + + case OpLoad: // We end up here because of IData of immediate structures. + // Failure case: + // (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as + // the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat). + // + // GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc + // cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR + // b2: ← b1 + // v20 (+142) = StaticLECall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1 + // v21 (142) = SelectN [1] v20 + // v22 (142) = SelectN [0] v20 + // b15: ← b8 + // v71 (+143) = IData v22 (v[Nodes]) + // v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21 + // + // translates (w/o the "case OpLoad:" above) to: + // + // b2: ← b1 + // v20 (+142) = StaticCall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715 + // v23 (142) = Load <*uintptr> v19 v20 + // v823 (142) = IsNonNil v23 + // v67 (+143) = Load <*[]*Node> v880 v20 + // b15: ← b8 + // v827 (146) = StructSelect <*[]*Node> [0] v67 + // v846 (146) = Store {*[]*Node} v769 v827 v20 + // v73 (+146) = StaticCall {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846 + // i.e., the struct select is generated and remains in because it is not applied to an actual structure. + // The OpLoad was created to load the single field of the IData + // This case removes that StructSelect. + if leafType != selector.Type { + f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString()) + } + leaf.copyOf(selector) + for _, s := range namedSelects[selector] { + locs = append(locs, f.Names[s.locIndex]) + } + + case OpSelectN: + // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there. call := selector.Args[0] aux := call.Aux.(*AuxCall) which := selector.AuxInt @@ -171,10 +263,6 @@ func expandCalls(f *Func) { } else { leafType := removeTrivialWrapperTypes(leaf.Type) if canSSAType(leafType) { - for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 { - // This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA - leafType = leafType.Field(0).Type - } pt := types.NewPtr(leafType) off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt) // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input. @@ -185,22 +273,29 @@ func expandCalls(f *Func) { } else { w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call) leaf.copyOf(w) + if debug { + fmt.Printf("\tnew %s\n", w.LongString()) + } + } + for _, s := range namedSelects[selector] { + locs = append(locs, f.Names[s.locIndex]) } } else { f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString()) } } + case OpStructSelect: w := selector.Args[0] var ls []LocalSlot - if w.Type.Etype != types.TSTRUCT { - f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name) - // Artifact of immediate interface idata + if w.Type.Etype != types.TSTRUCT { // IData artifact ls = rewriteSelect(leaf, w, offset) } else { ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt))) - for _, l := range ls { - locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt))) + if w.Op != OpIData { + for _, l := range ls { + locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt))) + } } } @@ -221,9 +316,7 @@ func expandCalls(f *Func) { case OpStringPtr: ls := rewriteSelect(leaf, selector.Args[0], offset) locs = splitSlots(ls, ".ptr", 0, typ.BytePtr) - //for i := range ls { - // locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr)) - //} + case OpSlicePtr: w := selector.Args[0] ls := rewriteSelect(leaf, w, offset) @@ -272,32 +365,130 @@ func expandCalls(f *Func) { return locs } - // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of - // smaller types into individual parameter slots. - var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value - storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value { + // storeArgOrLoad converts stores of SSA-able aggregate arguments (passed to a call) into a series of primitive-typed + // stores of non-aggregate types. It recursively walks up a chain of selectors until it reaches a Load or an Arg. + // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering. + var storeArgOrLoad func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value + + // decomposeArgOrLoad is a helper for storeArgOrLoad. + // It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions + // passed to it, and returns the new mem. If the type does not match one of the expected aggregate types, it returns nil instead. + decomposeArgOrLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64, + decomposeOne func(pos src.XPos, b *Block, base, source, mem *Value, t1 *types.Type, offArg, offStore int64) *Value, + decomposeTwo func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value) *Value { + u := source.Type + switch u.Etype { + case types.TARRAY: + elem := u.Elem() + for i := int64(0); i < u.NumElem(); i++ { + elemOff := i * elem.Size() + mem = decomposeOne(pos, b, base, source, mem, elem, source.AuxInt+elemOff, offset+elemOff) + pos = pos.WithNotStmt() + } + return mem + case types.TSTRUCT: + for i := 0; i < u.NumFields(); i++ { + fld := u.Field(i) + mem = decomposeOne(pos, b, base, source, mem, fld.Type, source.AuxInt+fld.Offset, offset+fld.Offset) + pos = pos.WithNotStmt() + } + return mem + case types.TINT64, types.TUINT64: + if t.Width == regSize { + break + } + tHi, tLo := intPairTypes(t.Etype) + mem = decomposeOne(pos, b, base, source, mem, tHi, source.AuxInt+hiOffset, offset+hiOffset) + pos = pos.WithNotStmt() + return decomposeOne(pos, b, base, source, mem, tLo, source.AuxInt+lowOffset, offset+lowOffset) + case types.TINTER: + return decomposeTwo(pos, b, base, source, mem, typ.Uintptr, typ.BytePtr, source.AuxInt, offset) + case types.TSTRING: + return decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset) + case types.TCOMPLEX64: + return decomposeTwo(pos, b, base, source, mem, typ.Float32, typ.Float32, source.AuxInt, offset) + case types.TCOMPLEX128: + return decomposeTwo(pos, b, base, source, mem, typ.Float64, typ.Float64, source.AuxInt, offset) + case types.TSLICE: + mem = decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset) + return decomposeOne(pos, b, base, source, mem, typ.Int, source.AuxInt+2*ptrSize, offset+2*ptrSize) + } + return nil + } + + // storeOneArg creates a decomposed (one step) arg that is then stored. + // pos and b locate the store instruction, base is the base of the store target, source is the "base" of the value input, + // mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases. + storeOneArg := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value { + w := common[selKey{source, offArg, t.Width, t}] + if w == nil { + w = source.Block.NewValue0IA(source.Pos, OpArg, t, offArg, source.Aux) + common[selKey{source, offArg, t.Width, t}] = w + } + return storeArgOrLoad(pos, b, base, w, mem, t, offStore) + } + + // storeOneLoad creates a decomposed (one step) load that is then stored. + storeOneLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value { + from := offsetFrom(source.Args[0], offArg, types.NewPtr(t)) + w := source.Block.NewValue2(source.Pos, OpLoad, t, from, mem) + return storeArgOrLoad(pos, b, base, w, mem, t, offStore) + } + + storeTwoArg := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value { + mem = storeOneArg(pos, b, base, source, mem, t1, offArg, offStore) + pos = pos.WithNotStmt() + t1Size := t1.Size() + return storeOneArg(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size) + } + + storeTwoLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value { + mem = storeOneLoad(pos, b, base, source, mem, t1, offArg, offStore) + pos = pos.WithNotStmt() + t1Size := t1.Size() + return storeOneLoad(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size) + } + + storeArgOrLoad = func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value { if debug { - fmt.Printf("\tstoreArg(%s; %s; %v; %d; %s)\n", b, a.LongString(), t, offset, mem.String()) + fmt.Printf("\tstoreArgOrLoad(%s; %s; %s; %s; %d)\n", base.LongString(), source.LongString(), mem.String(), t.String(), offset) } - switch a.Op { + switch source.Op { + case OpCopy: + return storeArgOrLoad(pos, b, base, source.Args[0], mem, t, offset) + + case OpLoad: + ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneLoad, storeTwoLoad) + if ret != nil { + return ret + } + + case OpArg: + ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneArg, storeTwoArg) + if ret != nil { + return ret + } + case OpArrayMake0, OpStructMake0: return mem case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4: for i := 0; i < t.NumFields(); i++ { fld := t.Field(i) - mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[i], mem, fld.Type, offset+fld.Offset) + pos = pos.WithNotStmt() } return mem case OpArrayMake1: - return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem) + return storeArgOrLoad(pos, b, base, source.Args[0], mem, t.Elem(), offset) case OpInt64Make: tHi, tLo := intPairTypes(t.Etype) - mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem) - return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tHi, offset+hiOffset) + pos = pos.WithNotStmt() + return storeArgOrLoad(pos, b, base, source.Args[1], mem, tLo, offset+lowOffset) case OpComplexMake: tPart := typ.Float32 @@ -305,59 +496,45 @@ func expandCalls(f *Func) { if wPart == 8 { tPart = typ.Float64 } - mem = storeArg(pos, b, a.Args[0], tPart, offset, mem) - return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tPart, offset) + pos = pos.WithNotStmt() + return storeArgOrLoad(pos, b, base, source.Args[1], mem, tPart, offset+wPart) case OpIMake: - mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem) - return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.Uintptr, offset) + pos = pos.WithNotStmt() + return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.BytePtr, offset+ptrSize) case OpStringMake: - mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem) - return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset) + pos = pos.WithNotStmt() + return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize) case OpSliceMake: - mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem) - mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem) - return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem) + mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset) + pos = pos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize) + return storeArgOrLoad(pos, b, base, source.Args[2], mem, typ.Int, offset+2*ptrSize) } - dst := offsetFrom(sp, offset, types.NewPtr(t)) - x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem) - if debug { - fmt.Printf("\t\tstoreArg returns %s\n", x.LongString()) - } - return x - } - - // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting - // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts. - // This has to handle aggregate types that have already been lowered by an earlier phase. - var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value - splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value { - if debug { - fmt.Printf("\tsplitStore(%s; %s; %s; %s; %v; %d; %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos) - } - pos := v.Pos.WithNotStmt() + // For nodes that cannot be taken apart -- OpSelectN, other structure selectors. switch t.Etype { case types.TARRAY: elt := t.Elem() - if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize { + if source.Type != t && t.NumElem() == 1 && elt.Width == t.Width && t.Width == regSize { t = removeTrivialWrapperTypes(t) - if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY { - f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it") - } - break // handle the leaf type. + // it could be a leaf type, but the "leaf" could be complex64 (for example) + return storeArgOrLoad(pos, b, base, source, mem, t, offset) } for i := int64(0); i < t.NumElem(); i++ { sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source) - mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, elt, offset+i*elt.Width) + pos = pos.WithNotStmt() } return mem case types.TSTRUCT: - if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize { + if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize { // This peculiar test deals with accesses to immediate interface data. // It works okay because everything is the same size. // Example code that triggers this can be found in go/constant/value.go, function ToComplex @@ -377,16 +554,15 @@ func expandCalls(f *Func) { // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of // of a *uint8, which does not succeed. t = removeTrivialWrapperTypes(t) - // it could be a leaf type, but the "leaf" could be complex64 (for example) - return splitStore(dest, source, mem, v, t, offset, firstStorePos) + return storeArgOrLoad(pos, b, base, source, mem, t, offset) } for i := 0; i < t.NumFields(); i++ { fld := t.Field(i) sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source) - mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, fld.Type, offset+fld.Offset) + pos = pos.WithNotStmt() } return mem @@ -396,56 +572,55 @@ func expandCalls(f *Func) { } tHi, tLo := intPairTypes(t.Etype) sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source) - mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, tHi, offset+hiOffset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source) - return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, tLo, offset+lowOffset) case types.TINTER: sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source) - mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source) - return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset+ptrSize) case types.TSTRING: sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source) - mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source) - return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize) case types.TSLICE: et := types.NewPtr(t.Elem()) sel := source.Block.NewValue1(pos, OpSlicePtr, et, source) - mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, et, offset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source) - mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos) + mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize) sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source) - return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+2*ptrSize) case types.TCOMPLEX64: sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source) - mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source) - return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset+4) case types.TCOMPLEX128: sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source) - mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() + mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset) + pos = pos.WithNotStmt() sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source) - return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos) + return storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset+8) } - // Default, including for aggregates whose single element exactly fills their container - // TODO this will be a problem for cast interfaces containing floats when we move to registers. - x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem) + + dst := offsetFrom(base, offset, types.NewPtr(t)) + x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem) if debug { - fmt.Printf("\t\tsplitStore returns %s\n", x.LongString()) + fmt.Printf("\t\tstoreArg returns %s\n", x.LongString()) } - return x } @@ -490,7 +665,7 @@ func expandCalls(f *Func) { if debug { fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI)) } - mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem) + mem = storeArgOrLoad(pos, v.Block, sp, a, mem, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI)) } } v.resetArgs() @@ -523,7 +698,7 @@ func expandCalls(f *Func) { t := name.Type if isAlreadyExpandedAggregateType(t) { for j, v := range f.NamedValues[name] { - if v.Op == OpSelectN { + if v.Op == OpSelectN || v.Op == OpArg && isAlreadyExpandedAggregateType(v.Type) { ns := namedSelects[v] namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j}) } @@ -531,17 +706,19 @@ func expandCalls(f *Func) { } } - // Step 1: any stores of aggregates remaining are believed to be sourced from call results. + // Step 1: any stores of aggregates remaining are believed to be sourced from call results or args. // Decompose those stores into a series of smaller stores, adding selection ops as necessary. for _, b := range f.Blocks { for _, v := range b.Values { if v.Op == OpStore { t := v.Aux.(*types.Type) + source := v.Args[1] + tSrc := source.Type iAEATt := isAlreadyExpandedAggregateType(t) + if !iAEATt { // guarding against store immediate struct into interface data field -- store type is *uint8 // TODO can this happen recursively? - tSrc := v.Args[1].Type iAEATt = isAlreadyExpandedAggregateType(tSrc) if iAEATt { t = tSrc @@ -551,8 +728,8 @@ func expandCalls(f *Func) { if debug { fmt.Printf("Splitting store %s\n", v.LongString()) } - dst, source, mem := v.Args[0], v.Args[1], v.Args[2] - mem = splitStore(dst, source, mem, v, t, 0, v.Pos) + dst, mem := v.Args[0], v.Args[2] + mem = storeArgOrLoad(v.Pos, b, dst, source, mem, t, 0) v.copyOf(mem) } } @@ -579,7 +756,7 @@ func expandCalls(f *Func) { OpInt64Hi, OpInt64Lo: w := v.Args[0] switch w.Op { - case OpStructSelect, OpArraySelect, OpSelectN: + case OpStructSelect, OpArraySelect, OpSelectN, OpArg: val2Preds[w] += 1 if debug { fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) @@ -595,6 +772,17 @@ func expandCalls(f *Func) { } } + case OpArg: + if !isAlreadyExpandedAggregateType(v.Type) { + continue + } + if _, ok := val2Preds[v]; !ok { + val2Preds[v] = 0 + if debug { + fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) + } + } + case OpSelectNAddr: // Do these directly, there are no chains of selectors. call := v.Args[0] @@ -612,7 +800,6 @@ func expandCalls(f *Func) { // then forwards to rewrite selectors. // // All chains of selectors end up in same block as the call. - sdom := f.Sdom() // Compilation must be deterministic, so sort after extracting first zeroes from map. // Sorting allows dominators-last order within each batch, @@ -640,8 +827,11 @@ func expandCalls(f *Func) { last = len(allOrdered) sort.SliceStable(toProcess, less) for _, v := range toProcess { - w := v.Args[0] delete(val2Preds, v) + if v.Op == OpArg { + continue // no Args[0], hence done. + } + w := v.Args[0] n, ok := val2Preds[w] if !ok { continue @@ -655,13 +845,19 @@ func expandCalls(f *Func) { } } - common := make(map[selKey]*Value) + common = make(map[selKey]*Value) // Rewrite duplicate selectors as copies where possible. for i := len(allOrdered) - 1; i >= 0; i-- { v := allOrdered[i] + if v.Op == OpArg { + continue + } w := v.Args[0] - for w.Op == OpCopy { - w = w.Args[0] + if w.Op == OpCopy { + for w.Op == OpCopy { + w = w.Args[0] + } + v.SetArg(0, w) } typ := v.Type if typ.IsMemory() { @@ -691,7 +887,7 @@ func expandCalls(f *Func) { case OpComplexImag: offset = size } - sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype} + sk := selKey{from: w, size: size, offset: offset, typ: typ} dupe := common[sk] if dupe == nil { common[sk] = v diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules index 07607960fa..9297ed8d2e 100644 --- a/src/cmd/compile/internal/ssa/gen/dec64.rules +++ b/src/cmd/compile/internal/ssa/gen/dec64.rules @@ -41,20 +41,21 @@ lo (Store {hi.Type} dst hi mem)) -(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() => +// These are not enabled during decomposeBuiltin if late call expansion, but they are always enabled for softFloat +(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") => (Int64Make (Arg {n} [off+4]) (Arg {n} [off])) -(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() => +(Arg {n} [off]) && is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") => (Int64Make (Arg {n} [off+4]) (Arg {n} [off])) -(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() => +(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") => (Int64Make (Arg {n} [off]) (Arg {n} [off+4])) -(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() => +(Arg {n} [off]) && is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") => (Int64Make (Arg {n} [off]) (Arg {n} [off+4])) diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go index 8b9753414f..c49bc8043e 100644 --- a/src/cmd/compile/internal/ssa/rewritedec64.go +++ b/src/cmd/compile/internal/ssa/rewritedec64.go @@ -184,12 +184,12 @@ func rewriteValuedec64_OpArg(v *Value) bool { config := b.Func.Config typ := &b.Func.Config.Types // match: (Arg {n} [off]) - // cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() + // cond: is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") // result: (Int64Make (Arg {n} [off+4]) (Arg {n} [off])) for { off := auxIntToInt32(v.AuxInt) n := auxToSym(v.Aux) - if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned()) { + if !(is64BitInt(v.Type) && !config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) { break } v.reset(OpInt64Make) @@ -203,12 +203,12 @@ func rewriteValuedec64_OpArg(v *Value) bool { return true } // match: (Arg {n} [off]) - // cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() + // cond: is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") // result: (Int64Make (Arg {n} [off+4]) (Arg {n} [off])) for { off := auxIntToInt32(v.AuxInt) n := auxToSym(v.Aux) - if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned()) { + if !(is64BitInt(v.Type) && !config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) { break } v.reset(OpInt64Make) @@ -222,12 +222,12 @@ func rewriteValuedec64_OpArg(v *Value) bool { return true } // match: (Arg {n} [off]) - // cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() + // cond: is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") // result: (Int64Make (Arg {n} [off]) (Arg {n} [off+4])) for { off := auxIntToInt32(v.AuxInt) n := auxToSym(v.Aux) - if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned()) { + if !(is64BitInt(v.Type) && config.BigEndian && v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) { break } v.reset(OpInt64Make) @@ -241,12 +241,12 @@ func rewriteValuedec64_OpArg(v *Value) bool { return true } // match: (Arg {n} [off]) - // cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() + // cond: is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin") // result: (Int64Make (Arg {n} [off]) (Arg {n} [off+4])) for { off := auxIntToInt32(v.AuxInt) n := auxToSym(v.Aux) - if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned()) { + if !(is64BitInt(v.Type) && config.BigEndian && !v.Type.IsSigned() && !(go116lateCallExpansion && b.Func.pass.name == "decompose builtin")) { break } v.reset(OpInt64Make) diff --git a/src/cmd/compile/internal/ssa/stackalloc.go b/src/cmd/compile/internal/ssa/stackalloc.go index 7612585136..406a3c3ea5 100644 --- a/src/cmd/compile/internal/ssa/stackalloc.go +++ b/src/cmd/compile/internal/ssa/stackalloc.go @@ -153,6 +153,9 @@ func (s *stackAllocState) stackalloc() { if v.Op != OpArg { continue } + if v.Aux == nil { + f.Fatalf("%s has nil Aux\n", v.LongString()) + } loc := LocalSlot{N: v.Aux.(GCNode), Type: v.Type, Off: v.AuxInt} if f.pass.debug > stackDebug { fmt.Printf("stackalloc %s to %s\n", v, loc) -- 2.48.1