From 5eb99120844c0494d655678262e1fb41949a2b99 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 5 Mar 2021 19:56:13 -0500 Subject: [PATCH] cmd/compile: fix OpArg decomposer for registers in expandCalls Includes test taken from https://github.com/golang/go/issues/44816#issuecomment-791618179 and improved debugging output. Updates #44816 Change-Id: I94aeb9c5255f175fe80727be29d218bad54bf7ea Reviewed-on: https://go-review.googlesource.com/c/go/+/299389 Trust: David Chase Reviewed-by: Cherry Zhang --- src/cmd/compile/internal/ssa/expand_calls.go | 162 +++++++++++++++---- test/abi/double_nested_struct.go | 9 +- test/abi/struct_lower_1.go | 30 ++++ test/abi/struct_lower_1.out | 1 + test/abi/too_big_to_ssa.go | 7 +- 5 files changed, 166 insertions(+), 43 deletions(-) create mode 100644 test/abi/struct_lower_1.go create mode 100644 test/abi/struct_lower_1.out diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index df135853fe..516ea42db9 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -78,7 +78,8 @@ func (rc *registerCursor) String() string { regs = regs + x.LongString() } } - return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s], config=%v", dest, rc.regsLen, rc.nextSlice, regs, rc.config) + // not printing the config because that has not been useful + return fmt.Sprintf("RCSR{storeDest=%v, regsLen=%d, nextSlice=%d, regValues=[%s]}", dest, rc.regsLen, rc.nextSlice, regs) } // next effectively post-increments the register cursor; the receiver is advanced, @@ -189,6 +190,7 @@ type expandState struct { commonSelectors map[selKey]*Value // used to de-dupe selectors commonArgs map[selKey]*Value // used to de-dupe OpArg/OpArgIntReg/OpArgFloatReg memForCall map[ID]*Value // For a call, need to know the unique selector that gets the mem. + indentLevel int // Indentation for debugging recursion } // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target @@ -267,6 +269,19 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { panic(fmt.Errorf("Did not match param %v in prInfo %+v", name, abiInfo.InParams())) } +// indent increments (or decrements) the indentation. +func (x *expandState) indent(n int) { + x.indentLevel += n +} + +// Printf does an indented fmt.Printf on te format and args. +func (x *expandState) Printf(format string, a ...interface{}) (n int, err error) { + if x.indentLevel > 0 { + fmt.Printf("%[1]*s", x.indentLevel, "") + } + return fmt.Printf(format, a...) +} + // Calls that need lowering have some number of inputs, including a memory input, // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. @@ -286,7 +301,9 @@ func ParamAssignmentForArgName(f *Func, name *ir.Name) *abi.ABIParamAssignment { // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, regOffset Abi1RO) []LocalSlot { if x.debug { - fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset) + x.indent(3) + defer x.indent(-3) + x.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset) } var locs []LocalSlot leafType := leaf.Type @@ -308,7 +325,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, x.f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString()) } if x.debug { - fmt.Printf("\tOpArg, break\n") + x.Printf("---OpArg, break\n") } break } @@ -427,7 +444,7 @@ func (x *expandState) rewriteSelect(leaf *Value, selector *Value, offset int64, w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call) leaf.copyOf(w) if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } } } @@ -539,9 +556,86 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, return mem } -// decomposeArgOrLoad is a helper for storeArgOrLoad. -// It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions -// passed to it, and returns the new mem. +// decomposeArg is a helper for storeArgOrLoad. +// It decomposes a Load or an Arg into smaller parts and returns the new mem. +// If the type does not match one of the expected aggregate types, it returns nil instead. +// Parameters: +// pos -- the location of any generated code. +// b -- the block into which any generated code should normally be placed +// source -- the value, possibly an aggregate, to be stored. +// mem -- the mem flowing into this decomposition (loads depend on it, stores updated it) +// t -- the type of the value to be stored +// storeOffset -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset +// loadRegOffset -- regarding source as a value in registers, the register offset in ABI1. Meaningful only if source is OpArg. +// storeRc -- storeRC; if the value is stored in registers, this specifies the registers. +// StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. +func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { + + pa := x.prAssignForArg(source) + if len(pa.Registers) > 0 { + // Handle the in-registers case directly + rts, offs := pa.RegisterTypesAndOffsets() + last := loadRegOffset + x.regWidth(t) + if offs[loadRegOffset] != 0 { + panic(fmt.Errorf("offset %d of requested register %d should be zero", offs[loadRegOffset], loadRegOffset)) + } + for i := loadRegOffset; i < last; i++ { + rt := rts[i] + off := offs[i] + w := x.commonArgs[selKey{source, off, rt.Width, rt}] + if w == nil { + w = x.newArgToMemOrRegs(source, w, off, i, rt, pos) + } + mem = x.storeArgOrLoad(pos, b, w, mem, rt, storeOffset+off, i, storeRc.next(rt)) + } + return mem + } + + u := source.Type + switch u.Kind() { + case types.TARRAY: + elem := u.Elem() + elemRO := x.regWidth(elem) + for i := int64(0); i < u.NumElem(); i++ { + elemOff := i * elem.Size() + mem = storeOneArg(x, pos, b, source, mem, elem, elemOff, storeOffset+elemOff, loadRegOffset, storeRc.next(elem)) + loadRegOffset += elemRO + pos = pos.WithNotStmt() + } + return mem + case types.TSTRUCT: + for i := 0; i < u.NumFields(); i++ { + fld := u.Field(i) + mem = storeOneArg(x, pos, b, source, mem, fld.Type, fld.Offset, storeOffset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) + loadRegOffset += x.regWidth(fld.Type) + pos = pos.WithNotStmt() + } + return mem + case types.TINT64, types.TUINT64: + if t.Width == x.regSize { + break + } + tHi, tLo := x.intPairTypes(t.Kind()) + mem = storeOneArg(x, pos, b, source, mem, tHi, x.hiOffset, storeOffset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) + pos = pos.WithNotStmt() + return storeOneArg(x, pos, b, source, mem, tLo, x.lowOffset, storeOffset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) + case types.TINTER: + return storeTwoArg(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc) + case types.TSTRING: + return storeTwoArg(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, storeOffset, loadRegOffset, storeRc) + case types.TCOMPLEX64: + return storeTwoArg(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, storeOffset, loadRegOffset, storeRc) + case types.TCOMPLEX128: + return storeTwoArg(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, storeOffset, loadRegOffset, storeRc) + case types.TSLICE: + mem = storeOneArg(x, pos, b, source, mem, x.typs.BytePtr, 0, storeOffset, loadRegOffset, storeRc.next(x.typs.BytePtr)) + return storeTwoArg(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, storeOffset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) + } + return nil +} + +// decomposeLoad is a helper for storeArgOrLoad. +// It decomposes a Load into smaller parts and returns the new mem. // If the type does not match one of the expected aggregate types, it returns nil instead. // Parameters: // pos -- the location of any generated code. @@ -555,11 +649,7 @@ func (x *expandState) rewriteDereference(b *Block, base, a, mem *Value, offset, // StoreRc also identifies whether the target is registers or memory, and has the base for the store operation. // // TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates. -func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor, - // For decompose One and Two, the additional offArg provides the offset from the beginning of "source", if it is in memory. - // offStore is combined to base to obtain a store destionation, like "offset" of decomposeArgOrLoad - decomposeOne func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value, - decomposeTwo func(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value) *Value { +func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { u := source.Type switch u.Kind() { case types.TARRAY: @@ -567,7 +657,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va elemRO := x.regWidth(elem) for i := int64(0); i < u.NumElem(); i++ { elemOff := i * elem.Size() - mem = decomposeOne(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem)) + mem = storeOneLoad(x, pos, b, source, mem, elem, elemOff, offset+elemOff, loadRegOffset, storeRc.next(elem)) loadRegOffset += elemRO pos = pos.WithNotStmt() } @@ -575,7 +665,7 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va case types.TSTRUCT: for i := 0; i < u.NumFields(); i++ { fld := u.Field(i) - mem = decomposeOne(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) + mem = storeOneLoad(x, pos, b, source, mem, fld.Type, fld.Offset, offset+fld.Offset, loadRegOffset, storeRc.next(fld.Type)) loadRegOffset += x.regWidth(fld.Type) pos = pos.WithNotStmt() } @@ -585,20 +675,20 @@ func (x *expandState) decomposeArgOrLoad(pos src.XPos, b *Block, source, mem *Va break } tHi, tLo := x.intPairTypes(t.Kind()) - mem = decomposeOne(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) + mem = storeOneLoad(x, pos, b, source, mem, tHi, x.hiOffset, offset+x.hiOffset, loadRegOffset+x.hiRo, storeRc.plus(x.hiRo)) pos = pos.WithNotStmt() - return decomposeOne(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) + return storeOneLoad(x, pos, b, source, mem, tLo, x.lowOffset, offset+x.lowOffset, loadRegOffset+x.loRo, storeRc.plus(x.loRo)) case types.TINTER: - return decomposeTwo(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Uintptr, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc) case types.TSTRING: - return decomposeTwo(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.BytePtr, x.typs.Int, 0, offset, loadRegOffset, storeRc) case types.TCOMPLEX64: - return decomposeTwo(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Float32, x.typs.Float32, 0, offset, loadRegOffset, storeRc) case types.TCOMPLEX128: - return decomposeTwo(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Float64, x.typs.Float64, 0, offset, loadRegOffset, storeRc) case types.TSLICE: - mem = decomposeOne(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr)) - return decomposeTwo(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) + mem = storeOneLoad(x, pos, b, source, mem, x.typs.BytePtr, 0, offset, loadRegOffset, storeRc.next(x.typs.BytePtr)) + return storeTwoLoad(x, pos, b, source, mem, x.typs.Int, x.typs.Int, x.ptrSize, offset+x.ptrSize, loadRegOffset+RO_slice_len, storeRc) } return nil } @@ -642,7 +732,9 @@ func storeTwoLoad(x *expandState, pos src.XPos, b *Block, source, mem *Value, t1 // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering. func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, offset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value { if x.debug { - fmt.Printf("\tstoreArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String()) + x.indent(3) + defer x.indent(-3) + x.Printf("storeArgOrLoad(%s; %s; %s; %d; %s)\n", source.LongString(), mem.String(), t.String(), offset, storeRc.String()) } // Start with Opcodes that can be disassembled @@ -651,13 +743,13 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, return x.storeArgOrLoad(pos, b, source.Args[0], mem, t, offset, loadRegOffset, storeRc) case OpLoad, OpDereference: - ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneLoad, storeTwoLoad) + ret := x.decomposeLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc) if ret != nil { return ret } case OpArg: - ret := x.decomposeArgOrLoad(pos, b, source, mem, t, offset, loadRegOffset, storeRc, storeOneArg, storeTwoArg) + ret := x.decomposeArg(pos, b, source, mem, t, offset, loadRegOffset, storeRc) if ret != nil { return ret } @@ -823,7 +915,7 @@ func (x *expandState) storeArgOrLoad(pos src.XPos, b *Block, source, mem *Value, s = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem) } if x.debug { - fmt.Printf("\t\tstoreArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String()) + x.Printf("-->storeArg returns %s, storeRc=%s\n", s.LongString(), storeRc.String()) } return s } @@ -860,7 +952,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) (*Value, []*Value) { aOffset = aux.OffsetOfArg(auxI) } if x.debug { - fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) + x.Printf("storeArg %s, %v, %d\n", a.LongString(), aType, aOffset) } rc.init(aRegs, aux.abiInfo, result, x.sp) mem = x.storeArgOrLoad(pos, v.Block, a, mem, aType, aOffset, 0, rc) @@ -910,7 +1002,7 @@ func expandCalls(f *Func) { } if x.debug { - fmt.Printf("\nexpandsCalls(%s)\n", f.Name) + x.Printf("\nexpandsCalls(%s)\n", f.Name) } // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here. @@ -1055,7 +1147,7 @@ func expandCalls(f *Func) { case OpStructSelect, OpArraySelect, OpSelectN, OpArg: val2Preds[w] += 1 if x.debug { - fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) + x.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) } } fallthrough @@ -1064,7 +1156,7 @@ func expandCalls(f *Func) { if _, ok := val2Preds[v]; !ok { val2Preds[v] = 0 if x.debug { - fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) + x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) } } @@ -1075,7 +1167,7 @@ func expandCalls(f *Func) { if _, ok := val2Preds[v]; !ok { val2Preds[v] = 0 if x.debug { - fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) + x.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) } } @@ -1203,7 +1295,7 @@ func expandCalls(f *Func) { for i, v := range allOrdered { if x.debug { b := v.Block - fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) + x.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) } if v.Uses == 0 { v.reset(OpInvalid) @@ -1305,7 +1397,7 @@ func (x *expandState) rewriteArgToMemOrRegs(v *Value) *Value { // newArgToMemOrRegs either rewrites toReplace into an OpArg referencing memory or into an OpArgXXXReg to a register, // or rewrites it into a copy of the appropriate OpArgXXX. The actual OpArgXXX is determined by combining baseArg (an OpArg) -// with offset, regOffset, and t to determine which portion of it reference (either all or a part, in memory or in registers). +// with offset, regOffset, and t to determine which portion of it to reference (either all or a part, in memory or in registers). func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, regOffset Abi1RO, t *types.Type, pos src.XPos) *Value { key := selKey{baseArg, offset, t.Width, t} w := x.commonArgs[key] @@ -1336,7 +1428,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, w := baseArg.Block.NewValue0IA(pos, OpArg, t, auxInt, aux) x.commonArgs[key] = w if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } if toReplace != nil { toReplace.copyOf(w) @@ -1364,7 +1456,7 @@ func (x *expandState) newArgToMemOrRegs(baseArg, toReplace *Value, offset int64, } else { w := baseArg.Block.NewValue0IA(pos, op, t, auxInt, aux) if x.debug { - fmt.Printf("\tnew %s\n", w.LongString()) + x.Printf("---new %s\n", w.LongString()) } x.commonArgs[key] = w if toReplace != nil { diff --git a/test/abi/double_nested_struct.go b/test/abi/double_nested_struct.go index 70d8ea4bce..814341e701 100644 --- a/test/abi/double_nested_struct.go +++ b/test/abi/double_nested_struct.go @@ -8,7 +8,7 @@ // license that can be found in the LICENSE file. // wasm is excluded because the compiler chatter about register abi pragma ends up -// on stdout, and causes the expected output to not match. +// on stdout, and causes the expected output to not match. package main @@ -37,13 +37,12 @@ func H(spp stringPairPair) string { //go:registerparams //go:noinline -func G(a,b,c,d string) stringPairPair { - return stringPairPair{stringPair{a,b},stringPair{c,d}} +func G(a, b, c, d string) stringPairPair { + return stringPairPair{stringPair{a, b}, stringPair{c, d}} } - func main() { - spp := G("this","is","a","test") + spp := G("this", "is", "a", "test") s := H(spp) gotVsWant(s, "this is a test") } diff --git a/test/abi/struct_lower_1.go b/test/abi/struct_lower_1.go new file mode 100644 index 0000000000..b20de9be4b --- /dev/null +++ b/test/abi/struct_lower_1.go @@ -0,0 +1,30 @@ +// run + +//go:build !wasm +// +build !wasm + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "fmt" + +//go:registerparams +//go:noinline +func passStruct6(a Struct6) Struct6 { + return a +} + +type Struct6 struct { + Struct1 +} + +type Struct1 struct { + A, B, C uint +} + +func main() { + fmt.Println(passStruct6(Struct6{Struct1{1, 2, 3}})) +} diff --git a/test/abi/struct_lower_1.out b/test/abi/struct_lower_1.out new file mode 100644 index 0000000000..d326cb6119 --- /dev/null +++ b/test/abi/struct_lower_1.out @@ -0,0 +1 @@ +{{1 2 3}} diff --git a/test/abi/too_big_to_ssa.go b/test/abi/too_big_to_ssa.go index a5c6abb0e4..6c55d31419 100644 --- a/test/abi/too_big_to_ssa.go +++ b/test/abi/too_big_to_ssa.go @@ -1,5 +1,6 @@ // run +//go:build !wasm // +build !wasm // Copyright 2021 The Go Authors. All rights reserved. @@ -16,7 +17,7 @@ var sink *string type toobig struct { // 6 words will not SSA but will fit in registers - a,b,c string + a, b, c string } //go:registerparams @@ -27,8 +28,8 @@ func H(x toobig) string { //go:registerparams //go:noinline -func I(a,b,c string) toobig { - return toobig{a,b,c} +func I(a, b, c string) toobig { + return toobig{a, b, c} } func main() { -- 2.50.0