From c3fe874f25ff55f73e4422bea7aa0b0e0e268f3e Mon Sep 17 00:00:00 2001 From: David Chase Date: Mon, 17 Aug 2020 16:57:22 -0400 Subject: [PATCH] cmd/compile: avoid generating CSEs; do all aggregates; maintain debug names This adds a pass to detect common selection operations, to avoid generating duplicates. Duplicate offsets are also detected. All aggregate types are now handled; there is some freedom in where expand_calls is run, though it must run before softfloat. Debug-name-maintenance is now incremental both in decompose builtin and in expand_calls; it might be good to push this into all the decompose passes. (this is a smash of 5 CLs that rewrote some of the same code several times to deal with phase-ordering problems, and included an abandoned attempt.) For #40724. Change-Id: I2a0c32f20660bf8b99e2bcecd33545d97d2bd3c6 Reviewed-on: https://go-review.googlesource.com/c/go/+/249458 Trust: David Chase Run-TryBot: David Chase TryBot-Result: Go Bot Reviewed-by: Cherry Zhang --- src/cmd/compile/fmtmap_test.go | 1 + src/cmd/compile/internal/gc/ssa.go | 104 +-- src/cmd/compile/internal/ssa/compile.go | 2 +- src/cmd/compile/internal/ssa/config.go | 1 + src/cmd/compile/internal/ssa/decompose.go | 87 +- src/cmd/compile/internal/ssa/expand_calls.go | 555 ++++++++++--- src/cmd/compile/internal/ssa/export_test.go | 4 + src/cmd/compile/internal/ssa/gen/dec64.rules | 137 ++-- src/cmd/compile/internal/ssa/rewritedec64.go | 796 +++++++++++++------ 9 files changed, 1163 insertions(+), 524 deletions(-) diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go index 179c60187f..f8c33ec1f9 100644 --- a/src/cmd/compile/fmtmap_test.go +++ b/src/cmd/compile/fmtmap_test.go @@ -136,6 +136,7 @@ var knownFormats = map[string]string{ "cmd/compile/internal/types.EType %s": "", "cmd/compile/internal/types.EType %v": "", "cmd/internal/obj.ABI %v": "", + "cmd/internal/src.XPos %v": "", "error %v": "", "float64 %.2f": "", "float64 %.3f": "", diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 979a092ba1..f840ef4066 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -4740,7 +4740,7 @@ func (s *state) getClosureAndRcvr(fn *Node) (*ssa.Value, *ssa.Value) { s.nilCheck(itab) itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab closure := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab) - rcvr := s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i) + rcvr := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, i) return closure, rcvr } @@ -6904,56 +6904,38 @@ func (e *ssafn) Auto(pos src.XPos, t *types.Type) ssa.GCNode { } func (e *ssafn) SplitString(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) { - n := name.N.(*Node) ptrType := types.NewPtr(types.Types[TUINT8]) lenType := types.Types[TINT] - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Split this string up into two separate variables. - p := e.splitSlot(&name, ".ptr", 0, ptrType) - l := e.splitSlot(&name, ".len", ptrType.Size(), lenType) - return p, l - } - // Return the two parts of the larger variable. - return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off}, ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)} + // Split this string up into two separate variables. + p := e.SplitSlot(&name, ".ptr", 0, ptrType) + l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType) + return p, l } func (e *ssafn) SplitInterface(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) { n := name.N.(*Node) u := types.Types[TUINTPTR] t := types.NewPtr(types.Types[TUINT8]) - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Split this interface up into two separate variables. - f := ".itab" - if n.Type.IsEmptyInterface() { - f = ".type" - } - c := e.splitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1. - d := e.splitSlot(&name, ".data", u.Size(), t) - return c, d + // Split this interface up into two separate variables. + f := ".itab" + if n.Type.IsEmptyInterface() { + f = ".type" } - // Return the two parts of the larger variable. - return ssa.LocalSlot{N: n, Type: u, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + int64(Widthptr)} + c := e.SplitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1. + d := e.SplitSlot(&name, ".data", u.Size(), t) + return c, d } func (e *ssafn) SplitSlice(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot, ssa.LocalSlot) { - n := name.N.(*Node) ptrType := types.NewPtr(name.Type.Elem()) lenType := types.Types[TINT] - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Split this slice up into three separate variables. - p := e.splitSlot(&name, ".ptr", 0, ptrType) - l := e.splitSlot(&name, ".len", ptrType.Size(), lenType) - c := e.splitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType) - return p, l, c - } - // Return the three parts of the larger variable. - return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off}, - ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)}, - ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(2*Widthptr)} + p := e.SplitSlot(&name, ".ptr", 0, ptrType) + l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType) + c := e.SplitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType) + return p, l, c } func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) { - n := name.N.(*Node) s := name.Type.Size() / 2 var t *types.Type if s == 8 { @@ -6961,53 +6943,35 @@ func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) } else { t = types.Types[TFLOAT32] } - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Split this complex up into two separate variables. - r := e.splitSlot(&name, ".real", 0, t) - i := e.splitSlot(&name, ".imag", t.Size(), t) - return r, i - } - // Return the two parts of the larger variable. - return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s} + r := e.SplitSlot(&name, ".real", 0, t) + i := e.SplitSlot(&name, ".imag", t.Size(), t) + return r, i } func (e *ssafn) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) { - n := name.N.(*Node) var t *types.Type if name.Type.IsSigned() { t = types.Types[TINT32] } else { t = types.Types[TUINT32] } - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Split this int64 up into two separate variables. - if thearch.LinkArch.ByteOrder == binary.BigEndian { - return e.splitSlot(&name, ".hi", 0, t), e.splitSlot(&name, ".lo", t.Size(), types.Types[TUINT32]) - } - return e.splitSlot(&name, ".hi", t.Size(), t), e.splitSlot(&name, ".lo", 0, types.Types[TUINT32]) - } - // Return the two parts of the larger variable. if thearch.LinkArch.ByteOrder == binary.BigEndian { - return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off + 4} + return e.SplitSlot(&name, ".hi", 0, t), e.SplitSlot(&name, ".lo", t.Size(), types.Types[TUINT32]) } - return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off} + return e.SplitSlot(&name, ".hi", t.Size(), t), e.SplitSlot(&name, ".lo", 0, types.Types[TUINT32]) } func (e *ssafn) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot { - n := name.N.(*Node) st := name.Type ft := st.FieldType(i) var offset int64 for f := 0; f < i; f++ { offset += st.FieldType(f).Size() } - if n.Class() == PAUTO && !n.Name.Addrtaken() { - // Note: the _ field may appear several times. But - // have no fear, identically-named but distinct Autos are - // ok, albeit maybe confusing for a debugger. - return e.splitSlot(&name, "."+st.FieldName(i), offset, ft) - } - return ssa.LocalSlot{N: n, Type: ft, Off: name.Off + st.FieldOff(i)} + // Note: the _ field may appear several times. But + // have no fear, identically-named but distinct Autos are + // ok, albeit maybe confusing for a debugger. + return e.SplitSlot(&name, "."+st.FieldName(i), offset, ft) } func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot { @@ -7017,19 +6981,23 @@ func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot { e.Fatalf(n.Pos, "bad array size") } et := at.Elem() - if n.Class() == PAUTO && !n.Name.Addrtaken() { - return e.splitSlot(&name, "[0]", 0, et) - } - return ssa.LocalSlot{N: n, Type: et, Off: name.Off} + return e.SplitSlot(&name, "[0]", 0, et) } func (e *ssafn) DerefItab(it *obj.LSym, offset int64) *obj.LSym { return itabsym(it, offset) } -// splitSlot returns a slot representing the data of parent starting at offset. -func (e *ssafn) splitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot { - s := &types.Sym{Name: parent.N.(*Node).Sym.Name + suffix, Pkg: localpkg} +// SplitSlot returns a slot representing the data of parent starting at offset. +func (e *ssafn) SplitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot { + node := parent.N.(*Node) + + if node.Class() != PAUTO || node.Name.Addrtaken() { + // addressed things and non-autos retain their parents (i.e., cannot truly be split) + return ssa.LocalSlot{N: node, Type: t, Off: parent.Off + offset} + } + + s := &types.Sym{Name: node.Sym.Name + suffix, Pkg: localpkg} n := &Node{ Name: new(Name), diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go index 0664c0ba46..bddd271273 100644 --- a/src/cmd/compile/internal/ssa/compile.go +++ b/src/cmd/compile/internal/ssa/compile.go @@ -441,8 +441,8 @@ var passes = [...]pass{ {name: "nilcheckelim", fn: nilcheckelim}, {name: "prove", fn: prove}, {name: "early fuse", fn: fuseEarly}, - {name: "expand calls", fn: expandCalls, required: true}, {name: "decompose builtin", fn: decomposeBuiltIn, required: true}, + {name: "expand calls", fn: expandCalls, required: true}, {name: "softfloat", fn: softfloat, required: true}, {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules {name: "dead auto elim", fn: elimDeadAutosGeneric}, diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index f1a748309c..cb6f6fe7a1 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -149,6 +149,7 @@ type Frontend interface { SplitStruct(LocalSlot, int) LocalSlot SplitArray(LocalSlot) LocalSlot // array must be length 1 SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo) + SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot // DerefItab dereferences an itab function // entry, given the symbol of the itab and diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go index ab27ba85ae..bf7f1e826b 100644 --- a/src/cmd/compile/internal/ssa/decompose.go +++ b/src/cmd/compile/internal/ssa/decompose.go @@ -6,6 +6,7 @@ package ssa import ( "cmd/compile/internal/types" + "sort" ) // decompose converts phi ops on compound builtin types into phi @@ -31,77 +32,79 @@ func decomposeBuiltIn(f *Func) { } // Split up named values into their components. + // accumulate old names for aggregates (that are decomposed) in toDelete for efficient bulk deletion, + // accumulate new LocalSlots in newNames for addition after the iteration. This decomposition is for + // builtin types with leaf components, and thus there is no need to reprocess the newly create LocalSlots. + var toDelete []namedVal var newNames []LocalSlot - for _, name := range f.Names { + for i, name := range f.Names { t := name.Type switch { case t.IsInteger() && t.Size() > f.Config.RegSize: hiName, loName := f.fe.SplitInt64(name) newNames = append(newNames, hiName, loName) - for _, v := range f.NamedValues[name] { + for j, v := range f.NamedValues[name] { if v.Op != OpInt64Make { continue } f.NamedValues[hiName] = append(f.NamedValues[hiName], v.Args[0]) f.NamedValues[loName] = append(f.NamedValues[loName], v.Args[1]) + toDelete = append(toDelete, namedVal{i, j}) } - delete(f.NamedValues, name) case t.IsComplex(): rName, iName := f.fe.SplitComplex(name) newNames = append(newNames, rName, iName) - for _, v := range f.NamedValues[name] { + for j, v := range f.NamedValues[name] { if v.Op != OpComplexMake { continue } f.NamedValues[rName] = append(f.NamedValues[rName], v.Args[0]) f.NamedValues[iName] = append(f.NamedValues[iName], v.Args[1]) - + toDelete = append(toDelete, namedVal{i, j}) } - delete(f.NamedValues, name) case t.IsString(): ptrName, lenName := f.fe.SplitString(name) newNames = append(newNames, ptrName, lenName) - for _, v := range f.NamedValues[name] { + for j, v := range f.NamedValues[name] { if v.Op != OpStringMake { continue } f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0]) f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1]) + toDelete = append(toDelete, namedVal{i, j}) } - delete(f.NamedValues, name) case t.IsSlice(): ptrName, lenName, capName := f.fe.SplitSlice(name) newNames = append(newNames, ptrName, lenName, capName) - for _, v := range f.NamedValues[name] { + for j, v := range f.NamedValues[name] { if v.Op != OpSliceMake { continue } f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0]) f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1]) f.NamedValues[capName] = append(f.NamedValues[capName], v.Args[2]) + toDelete = append(toDelete, namedVal{i, j}) } - delete(f.NamedValues, name) case t.IsInterface(): typeName, dataName := f.fe.SplitInterface(name) newNames = append(newNames, typeName, dataName) - for _, v := range f.NamedValues[name] { + for j, v := range f.NamedValues[name] { if v.Op != OpIMake { continue } f.NamedValues[typeName] = append(f.NamedValues[typeName], v.Args[0]) f.NamedValues[dataName] = append(f.NamedValues[dataName], v.Args[1]) + toDelete = append(toDelete, namedVal{i, j}) } - delete(f.NamedValues, name) case t.IsFloat(): // floats are never decomposed, even ones bigger than RegSize - newNames = append(newNames, name) case t.Size() > f.Config.RegSize: f.Fatalf("undecomposed named type %s %v", name, t) - default: - newNames = append(newNames, name) } } - f.Names = newNames + + deleteNamedVals(f, toDelete) + f.Names = append(f.Names, newNames...) } func decomposeBuiltInPhi(v *Value) { @@ -263,14 +266,20 @@ func decomposeUserArrayInto(f *Func, name LocalSlot, slots []LocalSlot) []LocalS f.Fatalf("array not of size 1") } elemName := f.fe.SplitArray(name) + var keep []*Value for _, v := range f.NamedValues[name] { if v.Op != OpArrayMake1 { + keep = append(keep, v) continue } f.NamedValues[elemName] = append(f.NamedValues[elemName], v.Args[0]) } - // delete the name for the array as a whole - delete(f.NamedValues, name) + if len(keep) == 0 { + // delete the name for the array as a whole + delete(f.NamedValues, name) + } else { + f.NamedValues[name] = keep + } if t.Elem().IsArray() { return decomposeUserArrayInto(f, elemName, slots) @@ -300,17 +309,23 @@ func decomposeUserStructInto(f *Func, name LocalSlot, slots []LocalSlot) []Local } makeOp := StructMakeOp(n) + var keep []*Value // create named values for each struct field for _, v := range f.NamedValues[name] { if v.Op != makeOp { + keep = append(keep, v) continue } for i := 0; i < len(fnames); i++ { f.NamedValues[fnames[i]] = append(f.NamedValues[fnames[i]], v.Args[i]) } } - // remove the name of the struct as a whole - delete(f.NamedValues, name) + if len(keep) == 0 { + // delete the name for the struct as a whole + delete(f.NamedValues, name) + } else { + f.NamedValues[name] = keep + } // now that this f.NamedValues contains values for the struct // fields, recurse into nested structs @@ -400,3 +415,35 @@ func StructMakeOp(nf int) Op { } panic("too many fields in an SSAable struct") } + +type namedVal struct { + locIndex, valIndex int // f.NamedValues[f.Names[locIndex]][valIndex] = key +} + +// deleteNamedVals removes particular values with debugger names from f's naming data structures +func deleteNamedVals(f *Func, toDelete []namedVal) { + // Arrange to delete from larger indices to smaller, to ensure swap-with-end deletion does not invalid pending indices. + sort.Slice(toDelete, func(i, j int) bool { + if toDelete[i].locIndex != toDelete[j].locIndex { + return toDelete[i].locIndex > toDelete[j].locIndex + } + return toDelete[i].valIndex > toDelete[j].valIndex + + }) + + // Get rid of obsolete names + for _, d := range toDelete { + loc := f.Names[d.locIndex] + vals := f.NamedValues[loc] + l := len(vals) - 1 + if l > 0 { + vals[d.valIndex] = vals[l] + f.NamedValues[loc] = vals[:l] + } else { + delete(f.NamedValues, loc) + l = len(f.Names) - 1 + f.Names[d.locIndex] = f.Names[l] + f.Names = f.Names[:l] + } + } +} diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go index bbd9aeee51..3e3573ff39 100644 --- a/src/cmd/compile/internal/ssa/expand_calls.go +++ b/src/cmd/compile/internal/ssa/expand_calls.go @@ -11,27 +11,44 @@ import ( "sort" ) +type selKey struct { + from *Value + offset int64 + size int64 + typ types.EType +} + +type offsetKey struct { + from *Value + offset int64 + pt *types.Type +} + // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form // that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are -// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in -// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is -// split between registers and memory). -// -// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower. +// reached. func expandCalls(f *Func) { + // Calls that need lowering have some number of inputs, including a memory input, + // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able. + + // With the current ABI those inputs need to be converted into stores to memory, + // rethreading the call's memory input to the first, and the new call now receiving the last. + + // With the current ABI, the outputs need to be converted to loads, which will all use the call's + // memory output as their input. if !LateCallExpansionEnabledWithin(f) { return } + debug := f.pass.debug > 0 + canSSAType := f.fe.CanSSA regSize := f.Config.RegSize sp, _ := f.spSb() + typ := &f.Config.Types + ptrSize := f.Config.PtrSize - debug := f.pass.debug > 0 - - // For 32-bit, need to deal with decomposition of 64-bit integers - tUint32 := types.Types[types.TUINT32] - tInt32 := types.Types[types.TINT32] + // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness. var hiOffset, lowOffset int64 if f.Config.BigEndian { lowOffset = 4 @@ -39,25 +56,63 @@ func expandCalls(f *Func) { hiOffset = 4 } + namedSelects := make(map[*Value][]namedVal) + // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target // that has no 64-bit integer registers. intPairTypes := func(et types.EType) (tHi, tLo *types.Type) { - tHi = tUint32 + tHi = typ.UInt32 if et == types.TINT64 { - tHi = tInt32 + tHi = typ.Int32 } - tLo = tUint32 + tLo = typ.UInt32 return } // isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type - // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser). - // Other aggregate types are expanded in decomposeBuiltin, which comes later. + // that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin, + // so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit + // integer on 32-bit). isAlreadyExpandedAggregateType := func(t *types.Type) bool { if !canSSAType(t) { return false } - return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger() + return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() || + t.Size() > regSize && t.IsInteger() + } + + offsets := make(map[offsetKey]*Value) + + // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP + // TODO should also optimize offsets from SB? + offsetFrom := func(from *Value, offset int64, pt *types.Type) *Value { + if offset == 0 && from.Type == pt { // this is not actually likely + return from + } + // Simplify, canonicalize + for from.Op == OpOffPtr { + offset += from.AuxInt + from = from.Args[0] + } + if from == sp { + return f.ConstOffPtrSP(pt, offset, sp) + } + key := offsetKey{from, offset, pt} + v := offsets[key] + if v != nil { + return v + } + v = from.Block.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from) + offsets[key] = v + return v + } + + splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot { + var locs []LocalSlot + for i := range ls { + locs = append(locs, f.fe.SplitSlot(&ls[i], sfx, offset, ty)) + } + return locs } // removeTrivialWrapperTypes unwraps layers of @@ -97,11 +152,16 @@ func expandCalls(f *Func) { // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering). // It emits the code necessary to implement the leaf select operation that leads to the call. // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. - var rewriteSelect func(leaf *Value, selector *Value, offset int64) - rewriteSelect = func(leaf *Value, selector *Value, offset int64) { + var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot + rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot { + var locs []LocalSlot + leafType := leaf.Type switch selector.Op { case OpSelectN: // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there. + for _, s := range namedSelects[selector] { + locs = append(locs, f.Names[s.locIndex]) + } call := selector.Args[0] aux := call.Aux.(*AuxCall) which := selector.AuxInt @@ -110,9 +170,13 @@ func expandCalls(f *Func) { leaf.copyOf(call) } else { leafType := removeTrivialWrapperTypes(leaf.Type) - pt := types.NewPtr(leafType) if canSSAType(leafType) { - off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp) + for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 { + // This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA + leafType = leafType.Field(0).Type + } + pt := types.NewPtr(leafType) + off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt) // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input. if leaf.Block == call.Block { leaf.reset(OpLoad) @@ -123,46 +187,110 @@ func expandCalls(f *Func) { leaf.copyOf(w) } } else { - panic("Should not have non-SSA-able OpSelectN") + f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString()) } } case OpStructSelect: w := selector.Args[0] + var ls []LocalSlot if w.Type.Etype != types.TSTRUCT { - fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name) + f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name) + // Artifact of immediate interface idata + ls = rewriteSelect(leaf, w, offset) + } else { + ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt))) + for _, l := range ls { + locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt))) + } } - rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt))) + + case OpArraySelect: + w := selector.Args[0] + rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt) case OpInt64Hi: w := selector.Args[0] - rewriteSelect(leaf, w, offset+hiOffset) + ls := rewriteSelect(leaf, w, offset+hiOffset) + locs = splitSlots(ls, ".hi", hiOffset, leafType) case OpInt64Lo: w := selector.Args[0] - rewriteSelect(leaf, w, offset+lowOffset) + ls := rewriteSelect(leaf, w, offset+lowOffset) + locs = splitSlots(ls, ".lo", lowOffset, leafType) - case OpArraySelect: + case OpStringPtr: + ls := rewriteSelect(leaf, selector.Args[0], offset) + locs = splitSlots(ls, ".ptr", 0, typ.BytePtr) + //for i := range ls { + // locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr)) + //} + case OpSlicePtr: w := selector.Args[0] - rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt) + ls := rewriteSelect(leaf, w, offset) + locs = splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem())) + + case OpITab: + w := selector.Args[0] + ls := rewriteSelect(leaf, w, offset) + sfx := ".itab" + if w.Type.IsEmptyInterface() { + sfx = ".type" + } + locs = splitSlots(ls, sfx, 0, typ.Uintptr) + + case OpComplexReal: + ls := rewriteSelect(leaf, selector.Args[0], offset) + locs = splitSlots(ls, ".real", 0, leafType) + + case OpComplexImag: + ls := rewriteSelect(leaf, selector.Args[0], offset+leafType.Width) // result is FloatNN, width of result is offset of imaginary part. + locs = splitSlots(ls, ".imag", leafType.Width, leafType) + + case OpStringLen, OpSliceLen: + ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize) + locs = splitSlots(ls, ".len", ptrSize, leafType) + + case OpIData: + ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize) + locs = splitSlots(ls, ".data", ptrSize, leafType) + + case OpSliceCap: + ls := rewriteSelect(leaf, selector.Args[0], offset+2*ptrSize) + locs = splitSlots(ls, ".cap", 2*ptrSize, leafType) + + case OpCopy: // If it's an intermediate result, recurse + locs = rewriteSelect(leaf, selector.Args[0], offset) + for _, s := range namedSelects[selector] { + // this copy may have had its own name, preserve that, too. + locs = append(locs, f.Names[s.locIndex]) + } + default: - // Ignore dead ends; on 32-bit, these can occur running before decompose builtins. + // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed). } + + return locs } // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of // smaller types into individual parameter slots. - // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory. var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value { + if debug { + fmt.Printf("\tstoreArg(%s; %s; %v; %d; %s)\n", b, a.LongString(), t, offset, mem.String()) + } + switch a.Op { case OpArrayMake0, OpStructMake0: return mem + case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4: for i := 0; i < t.NumFields(); i++ { fld := t.Field(i) mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem) } return mem + case OpArrayMake1: return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem) @@ -170,55 +298,51 @@ func expandCalls(f *Func) { tHi, tLo := intPairTypes(t.Etype) mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem) return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem) + + case OpComplexMake: + tPart := typ.Float32 + wPart := t.Width / 2 + if wPart == 8 { + tPart = typ.Float64 + } + mem = storeArg(pos, b, a.Args[0], tPart, offset, mem) + return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem) + + case OpIMake: + mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem) + return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem) + + case OpStringMake: + mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem) + return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem) + + case OpSliceMake: + mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem) + mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem) + return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem) } - dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp) + + dst := offsetFrom(sp, offset, types.NewPtr(t)) x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem) if debug { - fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString()) + fmt.Printf("\t\tstoreArg returns %s\n", x.LongString()) } return x } - // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP - // TODO should also optimize offsets from SB? - offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value { - pt := types.NewPtr(t) - if offset == 0 && dst.Type == pt { // this is not actually likely - return dst - } - if dst.Op != OpOffPtr { - return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst) - } - // Simplify OpOffPtr - from := dst.Args[0] - offset += dst.AuxInt - if from == sp { - return f.ConstOffPtrSP(pt, offset, sp) - } - return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from) - } - // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts. - var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value - splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value { - // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point. + // This has to handle aggregate types that have already been lowered by an earlier phase. + var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value + splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value { + if debug { + fmt.Printf("\tsplitStore(%s; %s; %s; %s; %v; %d; %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos) + } pos := v.Pos.WithNotStmt() switch t.Etype { - case types.TINT64, types.TUINT64: - if t.Width == regSize { - break - } - tHi, tLo := intPairTypes(t.Etype) - sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src) - mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos) - firstStorePos = firstStorePos.WithNotStmt() - sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src) - return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos) - case types.TARRAY: elt := t.Elem() - if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize { + if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize { t = removeTrivialWrapperTypes(t) if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY { f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it") @@ -226,13 +350,14 @@ func expandCalls(f *Func) { break // handle the leaf type. } for i := int64(0); i < t.NumElem(); i++ { - sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src) - mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos) + sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source) + mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos) firstStorePos = firstStorePos.WithNotStmt() } return mem + case types.TSTRUCT: - if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize { + if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize { // This peculiar test deals with accesses to immediate interface data. // It works okay because everything is the same size. // Example code that triggers this can be found in go/constant/value.go, function ToComplex @@ -240,26 +365,87 @@ func expandCalls(f *Func) { // v121 (+882) = StaticLECall {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1 // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)" // Guard against "struct{struct{*foo}}" + // Other rewriting phases create minor glitches when they transform IData, for instance the + // interface-typed Arg "x" of ToFloat in go/constant/value.go + // v6 (858) = Arg {x} (x[Value], x[Value]) + // is rewritten by decomposeArgs into + // v141 (858) = Arg {x} + // v139 (858) = Arg <*uint8> {x} [8] + // because of a type case clause on line 862 of go/constant/value.go + // case intVal: + // return itof(x) + // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of + // of a *uint8, which does not succeed. t = removeTrivialWrapperTypes(t) - if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY { - f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it") - } - break // handle the leaf type. + + // it could be a leaf type, but the "leaf" could be complex64 (for example) + return splitStore(dest, source, mem, v, t, offset, firstStorePos) } + for i := 0; i < t.NumFields(); i++ { fld := t.Field(i) - sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src) - mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos) + sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source) + mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos) firstStorePos = firstStorePos.WithNotStmt() } return mem + + case types.TINT64, types.TUINT64: + if t.Width == regSize { + break + } + tHi, tLo := intPairTypes(t.Etype) + sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source) + mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source) + return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos) + + case types.TINTER: + sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source) + mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source) + return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos) + + case types.TSTRING: + sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source) + mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source) + return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos) + + case types.TSLICE: + et := types.NewPtr(t.Elem()) + sel := source.Block.NewValue1(pos, OpSlicePtr, et, source) + mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source) + mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos) + sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source) + return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos) + + case types.TCOMPLEX64: + sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source) + mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source) + return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos) + + case types.TCOMPLEX128: + sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source) + mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos) + firstStorePos = firstStorePos.WithNotStmt() + sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source) + return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos) } // Default, including for aggregates whose single element exactly fills their container // TODO this will be a problem for cast interfaces containing floats when we move to registers. - x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem) + x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem) if debug { - fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString()) + fmt.Printf("\t\tsplitStore returns %s\n", x.LongString()) } + return x } @@ -286,21 +472,24 @@ func expandCalls(f *Func) { } // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move // TODO this will be more complicated with registers in the picture. - src := a.Args[0] - dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp) + source := a.Args[0] + dst := f.ConstOffPtrSP(source.Type, aux.OffsetOfArg(auxI), sp) if a.Uses == 1 && a.Block == v.Block { a.reset(OpMove) a.Pos = pos a.Type = types.TypeMem a.Aux = aux.TypeOfArg(auxI) a.AuxInt = aux.SizeOfArg(auxI) - a.SetArgs3(dst, src, mem) + a.SetArgs3(dst, source, mem) mem = a } else { - mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem) + mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, source, mem) mem.AuxInt = aux.SizeOfArg(auxI) } } else { + if debug { + fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI)) + } mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem) } } @@ -308,6 +497,8 @@ func expandCalls(f *Func) { return mem } + // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here. + // Step 0: rewrite the calls to convert incoming args to stores. for _, b := range f.Blocks { for _, v := range b.Values { @@ -328,15 +519,40 @@ func expandCalls(f *Func) { } } + for i, name := range f.Names { + t := name.Type + if isAlreadyExpandedAggregateType(t) { + for j, v := range f.NamedValues[name] { + if v.Op == OpSelectN { + ns := namedSelects[v] + namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j}) + } + } + } + } + // Step 1: any stores of aggregates remaining are believed to be sourced from call results. // Decompose those stores into a series of smaller stores, adding selection ops as necessary. for _, b := range f.Blocks { for _, v := range b.Values { if v.Op == OpStore { t := v.Aux.(*types.Type) - if isAlreadyExpandedAggregateType(t) { - dst, src, mem := v.Args[0], v.Args[1], v.Args[2] - mem = splitStore(dst, src, mem, v, t, 0, v.Pos) + iAEATt := isAlreadyExpandedAggregateType(t) + if !iAEATt { + // guarding against store immediate struct into interface data field -- store type is *uint8 + // TODO can this happen recursively? + tSrc := v.Args[1].Type + iAEATt = isAlreadyExpandedAggregateType(tSrc) + if iAEATt { + t = tSrc + } + } + if iAEATt { + if debug { + fmt.Printf("Splitting store %s\n", v.LongString()) + } + dst, source, mem := v.Args[0], v.Args[1], v.Args[2] + mem = splitStore(dst, source, mem, v, t, 0, v.Pos) v.copyOf(mem) } } @@ -345,23 +561,32 @@ func expandCalls(f *Func) { val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering. - // Step 2: accumulate selection operations for rewrite in topological order. + // Step 2: transform or accumulate selection operations for rewrite in topological order. + // + // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish + // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures), + // // Any select-for-addressing applied to call results can be transformed directly. - // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves. for _, b := range f.Blocks { for _, v := range b.Values { // Accumulate chains of selectors for processing in topological order switch v.Op { - case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo: + case OpStructSelect, OpArraySelect, + OpIData, OpITab, + OpStringPtr, OpStringLen, + OpSlicePtr, OpSliceLen, OpSliceCap, + OpComplexReal, OpComplexImag, + OpInt64Hi, OpInt64Lo: w := v.Args[0] switch w.Op { - case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN: + case OpStructSelect, OpArraySelect, OpSelectN: val2Preds[w] += 1 if debug { fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w]) } } fallthrough + case OpSelectN: if _, ok := val2Preds[v]; !ok { val2Preds[v] = 0 @@ -369,53 +594,153 @@ func expandCalls(f *Func) { fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v]) } } + case OpSelectNAddr: // Do these directly, there are no chains of selectors. call := v.Args[0] which := v.AuxInt aux := call.Aux.(*AuxCall) pt := v.Type - off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp) + off := offsetFrom(sp, aux.OffsetOfResult(which), pt) v.copyOf(off) } } } - // Compilation must be deterministic - var ordered []*Value - less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID } + // Step 3: Compute topological order of selectors, + // then process it in reverse to eliminate duplicates, + // then forwards to rewrite selectors. + // + // All chains of selectors end up in same block as the call. + sdom := f.Sdom() + + // Compilation must be deterministic, so sort after extracting first zeroes from map. + // Sorting allows dominators-last order within each batch, + // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort). + var toProcess []*Value + less := func(i, j int) bool { + vi, vj := toProcess[i], toProcess[j] + bi, bj := vi.Block, vj.Block + if bi == bj { + return vi.ID < vj.ID + } + return sdom.domorder(bi) > sdom.domorder(bj) // reverse the order to put dominators last. + } - // Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call. + // Accumulate order in allOrdered + var allOrdered []*Value + for v, n := range val2Preds { + if n == 0 { + allOrdered = append(allOrdered, v) + } + } + last := 0 // allOrdered[0:last] has been top-sorted and processed for len(val2Preds) > 0 { - ordered = ordered[:0] - for v, n := range val2Preds { - if n == 0 { - ordered = append(ordered, v) + toProcess = allOrdered[last:] + last = len(allOrdered) + sort.SliceStable(toProcess, less) + for _, v := range toProcess { + w := v.Args[0] + delete(val2Preds, v) + n, ok := val2Preds[w] + if !ok { + continue } + if n == 1 { + allOrdered = append(allOrdered, w) + delete(val2Preds, w) + continue + } + val2Preds[w] = n - 1 } - sort.Slice(ordered, less) - for _, v := range ordered { - for { - w := v.Args[0] - if debug { - fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString()) - } - delete(val2Preds, v) - rewriteSelect(v, v, 0) - v = w - n, ok := val2Preds[v] - if !ok { - break - } - if n != 1 { - val2Preds[v] = n - 1 - break - } - // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero. + } + + common := make(map[selKey]*Value) + // Rewrite duplicate selectors as copies where possible. + for i := len(allOrdered) - 1; i >= 0; i-- { + v := allOrdered[i] + w := v.Args[0] + for w.Op == OpCopy { + w = w.Args[0] + } + typ := v.Type + if typ.IsMemory() { + continue // handled elsewhere, not an indexable result + } + size := typ.Width + offset := int64(0) + switch v.Op { + case OpStructSelect: + if w.Type.Etype == types.TSTRUCT { + offset = w.Type.FieldOff(int(v.AuxInt)) + } else { // Immediate interface data artifact, offset is zero. + f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString()) } + case OpArraySelect: + offset = size * v.AuxInt + case OpSelectN: + offset = w.Aux.(*AuxCall).OffsetOfResult(v.AuxInt) + case OpInt64Hi: + offset = hiOffset + case OpInt64Lo: + offset = lowOffset + case OpStringLen, OpSliceLen, OpIData: + offset = ptrSize + case OpSliceCap: + offset = 2 * ptrSize + case OpComplexImag: + offset = size + } + sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype} + dupe := common[sk] + if dupe == nil { + common[sk] = v + } else if sdom.IsAncestorEq(dupe.Block, v.Block) { + v.copyOf(dupe) + } else { + // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen. + // Installing the new value might match some future values. + common[sk] = v } } + // Indices of entries in f.Names that need to be deleted. + var toDelete []namedVal + + // Rewrite selectors. + for i, v := range allOrdered { + if debug { + b := v.Block + fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses) + } + if v.Uses == 0 { + v.reset(OpInvalid) + continue + } + if v.Op == OpCopy { + continue + } + locs := rewriteSelect(v, v, 0) + // Install new names. + if v.Type.IsMemory() { + continue + } + // Leaf types may have debug locations + if !isAlreadyExpandedAggregateType(v.Type) { + for _, l := range locs { + f.NamedValues[l] = append(f.NamedValues[l], v) + } + f.Names = append(f.Names, locs...) + continue + } + // Not-leaf types that had debug locations need to lose them. + if ns, ok := namedSelects[v]; ok { + toDelete = append(toDelete, ns...) + } + } + + deleteNamedVals(f, toDelete) + // Step 4: rewrite the calls themselves, correcting the type for _, b := range f.Blocks { for _, v := range b.Values { diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go index 51665c60e2..b4c3e5cfdf 100644 --- a/src/cmd/compile/internal/ssa/export_test.go +++ b/src/cmd/compile/internal/ssa/export_test.go @@ -125,6 +125,10 @@ func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot { func (d DummyFrontend) SplitArray(s LocalSlot) LocalSlot { return LocalSlot{N: s.N, Type: s.Type.Elem(), Off: s.Off} } + +func (d DummyFrontend) SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot { + return LocalSlot{N: parent.N, Type: t, Off: offset} +} func (DummyFrontend) Line(_ src.XPos) string { return "unknown.go:0" } diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules index 4f9e863f90..07607960fa 100644 --- a/src/cmd/compile/internal/ssa/gen/dec64.rules +++ b/src/cmd/compile/internal/ssa/gen/dec64.rules @@ -9,7 +9,6 @@ (Int64Hi (Int64Make hi _)) => hi (Int64Lo (Int64Make _ lo)) => lo - (Load ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Int64Make (Load (OffPtr [4] ptr) mem) @@ -143,6 +142,10 @@ (Trunc64to32 (Int64Make _ lo)) => lo (Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo) (Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo) +// Most general +(Trunc64to32 x) => (Int64Lo x) +(Trunc64to16 x) => (Trunc32to16 (Int64Lo x)) +(Trunc64to8 x) => (Trunc32to8 (Int64Lo x)) (Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x) @@ -175,156 +178,174 @@ // turn x64 non-constant shifts to x32 shifts // if high 32-bit of the shift is nonzero, make a huge shift (Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh64x32 x (Or32 (Zeromask hi) lo)) + (Lsh64x32 x (Or32 (Zeromask hi) lo)) (Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh64x32 x (Or32 (Zeromask hi) lo)) + (Rsh64x32 x (Or32 (Zeromask hi) lo)) (Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh64Ux32 x (Or32 (Zeromask hi) lo)) + (Rsh64Ux32 x (Or32 (Zeromask hi) lo)) (Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh32x32 x (Or32 (Zeromask hi) lo)) + (Lsh32x32 x (Or32 (Zeromask hi) lo)) (Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh32x32 x (Or32 (Zeromask hi) lo)) + (Rsh32x32 x (Or32 (Zeromask hi) lo)) (Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh32Ux32 x (Or32 (Zeromask hi) lo)) + (Rsh32Ux32 x (Or32 (Zeromask hi) lo)) (Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh16x32 x (Or32 (Zeromask hi) lo)) + (Lsh16x32 x (Or32 (Zeromask hi) lo)) (Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh16x32 x (Or32 (Zeromask hi) lo)) + (Rsh16x32 x (Or32 (Zeromask hi) lo)) (Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh16Ux32 x (Or32 (Zeromask hi) lo)) + (Rsh16Ux32 x (Or32 (Zeromask hi) lo)) (Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh8x32 x (Or32 (Zeromask hi) lo)) + (Lsh8x32 x (Or32 (Zeromask hi) lo)) (Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh8x32 x (Or32 (Zeromask hi) lo)) + (Rsh8x32 x (Or32 (Zeromask hi) lo)) (Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh8Ux32 x (Or32 (Zeromask hi) lo)) + (Rsh8Ux32 x (Or32 (Zeromask hi) lo)) + +// Most general +(Lsh64x64 x y) => (Lsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh64x64 x y) => (Rsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh32x64 x y) => (Lsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh32x64 x y) => (Rsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh16x64 x y) => (Lsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh16x64 x y) => (Rsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh8x64 x y) => (Lsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh8x64 x y) => (Rsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + +// Clean up constants a little +(Or32 (Zeromask (Const32 [c])) y) && c == 0 => y +(Or32 (Zeromask (Const32 [c])) y) && c != 0 => (Const32 [-1]) // 64x left shift // result.hi = hi<>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0 // result.lo = lo< +(Lsh64x32 x s) => (Int64Make (Or32 (Or32 - (Lsh32x32 hi s) + (Lsh32x32 (Int64Hi x) s) (Rsh32Ux32 - lo + (Int64Lo x) (Sub32 (Const32 [32]) s))) (Lsh32x32 - lo + (Int64Lo x) (Sub32 s (Const32 [32])))) - (Lsh32x32 lo s)) -(Lsh64x16 (Int64Make hi lo) s) => + (Lsh32x32 (Int64Lo x) s)) +(Lsh64x16 x s) => (Int64Make (Or32 (Or32 - (Lsh32x16 hi s) + (Lsh32x16 (Int64Hi x) s) (Rsh32Ux16 - lo + (Int64Lo x) (Sub16 (Const16 [32]) s))) (Lsh32x16 - lo + (Int64Lo x) (Sub16 s (Const16 [32])))) - (Lsh32x16 lo s)) -(Lsh64x8 (Int64Make hi lo) s) => + (Lsh32x16 (Int64Lo x) s)) +(Lsh64x8 x s) => (Int64Make (Or32 (Or32 - (Lsh32x8 hi s) + (Lsh32x8 (Int64Hi x) s) (Rsh32Ux8 - lo + (Int64Lo x) (Sub8 (Const8 [32]) s))) (Lsh32x8 - lo + (Int64Lo x) (Sub8 s (Const8 [32])))) - (Lsh32x8 lo s)) + (Lsh32x8 (Int64Lo x) s)) // 64x unsigned right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0 -(Rsh64Ux32 (Int64Make hi lo) s) => +(Rsh64Ux32 x s) => (Int64Make - (Rsh32Ux32 hi s) + (Rsh32Ux32 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux32 lo s) + (Rsh32Ux32 (Int64Lo x) s) (Lsh32x32 - hi + (Int64Hi x) (Sub32 (Const32 [32]) s))) (Rsh32Ux32 - hi + (Int64Hi x) (Sub32 s (Const32 [32]))))) -(Rsh64Ux16 (Int64Make hi lo) s) => +(Rsh64Ux16 x s) => (Int64Make - (Rsh32Ux16 hi s) + (Rsh32Ux16 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux16 lo s) + (Rsh32Ux16 (Int64Lo x) s) (Lsh32x16 - hi + (Int64Hi x) (Sub16 (Const16 [32]) s))) (Rsh32Ux16 - hi + (Int64Hi x) (Sub16 s (Const16 [32]))))) -(Rsh64Ux8 (Int64Make hi lo) s) => +(Rsh64Ux8 x s) => (Int64Make - (Rsh32Ux8 hi s) + (Rsh32Ux8 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux8 lo s) + (Rsh32Ux8 (Int64Lo x) s) (Lsh32x8 - hi + (Int64Hi x) (Sub8 (Const8 [32]) s))) (Rsh32Ux8 - hi + (Int64Hi x) (Sub8 s (Const8 [32]))))) // 64x signed right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1 -(Rsh64x32 (Int64Make hi lo) s) => +(Rsh64x32 x s) => (Int64Make - (Rsh32x32 hi s) + (Rsh32x32 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux32 lo s) + (Rsh32Ux32 (Int64Lo x) s) (Lsh32x32 - hi + (Int64Hi x) (Sub32 (Const32 [32]) s))) (And32 (Rsh32x32 - hi + (Int64Hi x) (Sub32 s (Const32 [32]))) (Zeromask (Rsh32Ux32 s (Const32 [5])))))) -(Rsh64x16 (Int64Make hi lo) s) => +(Rsh64x16 x s) => (Int64Make - (Rsh32x16 hi s) + (Rsh32x16 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux16 lo s) + (Rsh32Ux16 (Int64Lo x) s) (Lsh32x16 - hi + (Int64Hi x) (Sub16 (Const16 [32]) s))) (And32 (Rsh32x16 - hi + (Int64Hi x) (Sub16 s (Const16 [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 s (Const32 [5]))))))) -(Rsh64x8 (Int64Make hi lo) s) => +(Rsh64x8 x s) => (Int64Make - (Rsh32x8 hi s) + (Rsh32x8 (Int64Hi x) s) (Or32 (Or32 - (Rsh32Ux8 lo s) + (Rsh32Ux8 (Int64Lo x) s) (Lsh32x8 - hi + (Int64Hi x) (Sub8 (Const8 [32]) s))) (And32 (Rsh32x8 - hi + (Int64Hi x) (Sub8 s (Const8 [32]))) (Zeromask (ZeroExt8to32 diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go index 86fbc9901a..8b9753414f 100644 --- a/src/cmd/compile/internal/ssa/rewritedec64.go +++ b/src/cmd/compile/internal/ssa/rewritedec64.go @@ -62,6 +62,8 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpNeg64(v) case OpNeq64: return rewriteValuedec64_OpNeq64(v) + case OpOr32: + return rewriteValuedec64_OpOr32(v) case OpOr64: return rewriteValuedec64_OpOr64(v) case OpRsh16Ux64: @@ -728,7 +730,23 @@ func rewriteValuedec64_OpLsh16x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Lsh16x64 x y) + // result: (Lsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpLsh16x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpLsh32x64(v *Value) bool { v_1 := v.Args[1] @@ -793,83 +811,97 @@ func rewriteValuedec64_OpLsh32x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Lsh32x64 x y) + // result: (Lsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpLsh32x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpLsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Lsh64x16 (Int64Make hi lo) s) - // result: (Int64Make (Or32 (Or32 (Lsh32x16 hi s) (Rsh32Ux16 lo (Sub16 (Const16 [32]) s))) (Lsh32x16 lo (Sub16 s (Const16 [32])))) (Lsh32x16 lo s)) + // match: (Lsh64x16 x s) + // result: (Int64Make (Or32 (Or32 (Lsh32x16 (Int64Hi x) s) (Rsh32Ux16 (Int64Lo x) (Sub16 (Const16 [32]) s))) (Lsh32x16 (Int64Lo x) (Sub16 s (Const16 [32])))) (Lsh32x16 (Int64Lo x) s)) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v2 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) - v2.AddArg2(hi, s) - v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) - v4 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v5 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) - v5.AuxInt = int16ToAuxInt(32) - v4.AddArg2(v5, s) - v3.AddArg2(lo, v4) - v1.AddArg2(v2, v3) - v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) - v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v7.AddArg2(s, v5) - v6.AddArg2(lo, v7) - v0.AddArg2(v1, v6) + v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v3.AddArg(x) + v2.AddArg2(v3, s) + v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v6 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v7 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) + v7.AuxInt = int16ToAuxInt(32) + v6.AddArg2(v7, s) + v4.AddArg2(v5, v6) + v1.AddArg2(v2, v4) v8 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) - v8.AddArg2(lo, s) - v.AddArg2(v0, v8) + v9 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v9.AddArg2(s, v7) + v8.AddArg2(v5, v9) + v0.AddArg2(v1, v8) + v10 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) + v10.AddArg2(v5, s) + v.AddArg2(v0, v10) return true } - return false } func rewriteValuedec64_OpLsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Lsh64x32 (Int64Make hi lo) s) - // result: (Int64Make (Or32 (Or32 (Lsh32x32 hi s) (Rsh32Ux32 lo (Sub32 (Const32 [32]) s))) (Lsh32x32 lo (Sub32 s (Const32 [32])))) (Lsh32x32 lo s)) + // match: (Lsh64x32 x s) + // result: (Int64Make (Or32 (Or32 (Lsh32x32 (Int64Hi x) s) (Rsh32Ux32 (Int64Lo x) (Sub32 (Const32 [32]) s))) (Lsh32x32 (Int64Lo x) (Sub32 s (Const32 [32])))) (Lsh32x32 (Int64Lo x) s)) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v2 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) - v2.AddArg2(hi, s) - v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v4 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v5.AuxInt = int32ToAuxInt(32) - v4.AddArg2(v5, s) - v3.AddArg2(lo, v4) - v1.AddArg2(v2, v3) - v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) - v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v7.AddArg2(s, v5) - v6.AddArg2(lo, v7) - v0.AddArg2(v1, v6) + v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v3.AddArg(x) + v2.AddArg2(v3, s) + v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v6 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v7.AuxInt = int32ToAuxInt(32) + v6.AddArg2(v7, s) + v4.AddArg2(v5, v6) + v1.AddArg2(v2, v4) v8 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) - v8.AddArg2(lo, s) - v.AddArg2(v0, v8) + v9 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v9.AddArg2(s, v7) + v8.AddArg2(v5, v9) + v0.AddArg2(v1, v8) + v10 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) + v10.AddArg2(v5, s) + v.AddArg2(v0, v10) return true } - return false } func rewriteValuedec64_OpLsh64x64(v *Value) bool { v_1 := v.Args[1] @@ -934,45 +966,60 @@ func rewriteValuedec64_OpLsh64x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Lsh64x64 x y) + // result: (Lsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpLsh64x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpLsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Lsh64x8 (Int64Make hi lo) s) - // result: (Int64Make (Or32 (Or32 (Lsh32x8 hi s) (Rsh32Ux8 lo (Sub8 (Const8 [32]) s))) (Lsh32x8 lo (Sub8 s (Const8 [32])))) (Lsh32x8 lo s)) + // match: (Lsh64x8 x s) + // result: (Int64Make (Or32 (Or32 (Lsh32x8 (Int64Hi x) s) (Rsh32Ux8 (Int64Lo x) (Sub8 (Const8 [32]) s))) (Lsh32x8 (Int64Lo x) (Sub8 s (Const8 [32])))) (Lsh32x8 (Int64Lo x) s)) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) v2 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) - v2.AddArg2(hi, s) - v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) - v4 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v5 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) - v5.AuxInt = int8ToAuxInt(32) - v4.AddArg2(v5, s) - v3.AddArg2(lo, v4) - v1.AddArg2(v2, v3) - v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) - v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v7.AddArg2(s, v5) - v6.AddArg2(lo, v7) - v0.AddArg2(v1, v6) + v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v3.AddArg(x) + v2.AddArg2(v3, s) + v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v6 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v7 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) + v7.AuxInt = int8ToAuxInt(32) + v6.AddArg2(v7, s) + v4.AddArg2(v5, v6) + v1.AddArg2(v2, v4) v8 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) - v8.AddArg2(lo, s) - v.AddArg2(v0, v8) + v9 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v9.AddArg2(s, v7) + v8.AddArg2(v5, v9) + v0.AddArg2(v1, v8) + v10 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) + v10.AddArg2(v5, s) + v.AddArg2(v0, v10) return true } - return false } func rewriteValuedec64_OpLsh8x64(v *Value) bool { v_1 := v.Args[1] @@ -1037,7 +1084,23 @@ func rewriteValuedec64_OpLsh8x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Lsh8x64 x y) + // result: (Lsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpLsh8x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpMul64(v *Value) bool { v_1 := v.Args[1] @@ -1118,6 +1181,64 @@ func rewriteValuedec64_OpNeq64(v *Value) bool { return true } } +func rewriteValuedec64_OpOr32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Or32 (Zeromask (Const32 [c])) y) + // cond: c == 0 + // result: y + for { + if v.Type != typ.UInt32 { + break + } + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpZeromask { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpConst32 { + continue + } + c := auxIntToInt32(v_0_0.AuxInt) + y := v_1 + if !(c == 0) { + continue + } + v.copyOf(y) + return true + } + break + } + // match: (Or32 (Zeromask (Const32 [c])) y) + // cond: c != 0 + // result: (Const32 [-1]) + for { + if v.Type != typ.UInt32 { + break + } + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpZeromask { + continue + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpConst32 { + continue + } + c := auxIntToInt32(v_0_0.AuxInt) + if !(c != 0) { + continue + } + v.reset(OpConst32) + v.Type = typ.UInt32 + v.AuxInt = int32ToAuxInt(-1) + return true + } + break + } + return false +} func rewriteValuedec64_OpOr64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -1208,7 +1329,23 @@ func rewriteValuedec64_OpRsh16Ux64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh16Ux64 x y) + // result: (Rsh16Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh16Ux32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh16x64(v *Value) bool { v_1 := v.Args[1] @@ -1276,7 +1413,23 @@ func rewriteValuedec64_OpRsh16x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh16x64 x y) + // result: (Rsh16x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh16x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh32Ux64(v *Value) bool { v_1 := v.Args[1] @@ -1341,7 +1494,23 @@ func rewriteValuedec64_OpRsh32Ux64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh32Ux64 x y) + // result: (Rsh32Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh32Ux32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh32x64(v *Value) bool { v_1 := v.Args[1] @@ -1407,83 +1576,97 @@ func rewriteValuedec64_OpRsh32x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh32x64 x y) + // result: (Rsh32x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh32x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh64Ux16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64Ux16 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32Ux16 hi s) (Or32 (Or32 (Rsh32Ux16 lo s) (Lsh32x16 hi (Sub16 (Const16 [32]) s))) (Rsh32Ux16 hi (Sub16 s (Const16 [32]))))) + // match: (Rsh64Ux16 x s) + // result: (Int64Make (Rsh32Ux16 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux16 (Int64Lo x) s) (Lsh32x16 (Int64Hi x) (Sub16 (Const16 [32]) s))) (Rsh32Ux16 (Int64Hi x) (Sub16 s (Const16 [32]))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v6 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) - v6.AuxInt = int16ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v8.AddArg2(s, v6) - v7.AddArg2(hi, v8) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v8 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) + v8.AuxInt = int16ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v10.AddArg2(s, v8) + v9.AddArg2(v1, v10) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh64Ux32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64Ux32 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32Ux32 hi s) (Or32 (Or32 (Rsh32Ux32 lo s) (Lsh32x32 hi (Sub32 (Const32 [32]) s))) (Rsh32Ux32 hi (Sub32 s (Const32 [32]))))) + // match: (Rsh64Ux32 x s) + // result: (Int64Make (Rsh32Ux32 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux32 (Int64Lo x) s) (Lsh32x32 (Int64Hi x) (Sub32 (Const32 [32]) s))) (Rsh32Ux32 (Int64Hi x) (Sub32 s (Const32 [32]))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v6.AuxInt = int32ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v8.AddArg2(s, v6) - v7.AddArg2(hi, v8) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v8.AuxInt = int32ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v10.AddArg2(s, v8) + v9.AddArg2(v1, v10) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh64Ux64(v *Value) bool { v_1 := v.Args[1] @@ -1548,139 +1731,152 @@ func rewriteValuedec64_OpRsh64Ux64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh64Ux64 x y) + // result: (Rsh64Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh64Ux32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh64Ux8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64Ux8 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32Ux8 hi s) (Or32 (Or32 (Rsh32Ux8 lo s) (Lsh32x8 hi (Sub8 (Const8 [32]) s))) (Rsh32Ux8 hi (Sub8 s (Const8 [32]))))) + // match: (Rsh64Ux8 x s) + // result: (Int64Make (Rsh32Ux8 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux8 (Int64Lo x) s) (Lsh32x8 (Int64Hi x) (Sub8 (Const8 [32]) s))) (Rsh32Ux8 (Int64Hi x) (Sub8 s (Const8 [32]))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v6 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) - v6.AuxInt = int8ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v8.AddArg2(s, v6) - v7.AddArg2(hi, v8) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v8 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) + v8.AuxInt = int8ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v10.AddArg2(s, v8) + v9.AddArg2(v1, v10) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh64x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64x16 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32x16 hi s) (Or32 (Or32 (Rsh32Ux16 lo s) (Lsh32x16 hi (Sub16 (Const16 [32]) s))) (And32 (Rsh32x16 hi (Sub16 s (Const16 [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 s (Const32 [5]))))))) + // match: (Rsh64x16 x s) + // result: (Int64Make (Rsh32x16 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux16 (Int64Lo x) s) (Lsh32x16 (Int64Hi x) (Sub16 (Const16 [32]) s))) (And32 (Rsh32x16 (Int64Hi x) (Sub16 s (Const16 [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 s (Const32 [5]))))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v6 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) - v6.AuxInt = int16ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32) - v9 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) - v9.AddArg2(s, v6) - v8.AddArg2(hi, v9) - v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) - v11 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) - v12 := b.NewValue0(v.Pos, OpRsh16Ux32, typ.UInt16) - v13 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v13.AuxInt = int32ToAuxInt(5) - v12.AddArg2(s, v13) - v11.AddArg(v12) - v10.AddArg(v11) - v7.AddArg2(v8, v10) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v8 := b.NewValue0(v.Pos, OpConst16, typ.UInt16) + v8.AuxInt = int16ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32) + v11 := b.NewValue0(v.Pos, OpSub16, typ.UInt16) + v11.AddArg2(s, v8) + v10.AddArg2(v1, v11) + v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v13 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32) + v14 := b.NewValue0(v.Pos, OpRsh16Ux32, typ.UInt16) + v15 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v15.AuxInt = int32ToAuxInt(5) + v14.AddArg2(s, v15) + v13.AddArg(v14) + v12.AddArg(v13) + v9.AddArg2(v10, v12) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh64x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64x32 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32x32 hi s) (Or32 (Or32 (Rsh32Ux32 lo s) (Lsh32x32 hi (Sub32 (Const32 [32]) s))) (And32 (Rsh32x32 hi (Sub32 s (Const32 [32]))) (Zeromask (Rsh32Ux32 s (Const32 [5])))))) + // match: (Rsh64x32 x s) + // result: (Int64Make (Rsh32x32 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux32 (Int64Lo x) s) (Lsh32x32 (Int64Hi x) (Sub32 (Const32 [32]) s))) (And32 (Rsh32x32 (Int64Hi x) (Sub32 s (Const32 [32]))) (Zeromask (Rsh32Ux32 s (Const32 [5])))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v6.AuxInt = int32ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) - v9 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) - v9.AddArg2(s, v6) - v8.AddArg2(hi, v9) - v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) - v11 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) - v12 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v12.AuxInt = int32ToAuxInt(5) - v11.AddArg2(s, v12) - v10.AddArg(v11) - v7.AddArg2(v8, v10) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v8.AuxInt = int32ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) + v11 := b.NewValue0(v.Pos, OpSub32, typ.UInt32) + v11.AddArg2(s, v8) + v10.AddArg2(v1, v11) + v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v13 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32) + v14 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v14.AuxInt = int32ToAuxInt(5) + v13.AddArg2(s, v14) + v12.AddArg(v13) + v9.AddArg2(v10, v12) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh64x64(v *Value) bool { v_1 := v.Args[1] @@ -1750,55 +1946,70 @@ func rewriteValuedec64_OpRsh64x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh64x64 x y) + // result: (Rsh64x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh64x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Rsh64x8 (Int64Make hi lo) s) - // result: (Int64Make (Rsh32x8 hi s) (Or32 (Or32 (Rsh32Ux8 lo s) (Lsh32x8 hi (Sub8 (Const8 [32]) s))) (And32 (Rsh32x8 hi (Sub8 s (Const8 [32]))) (Zeromask (ZeroExt8to32 (Rsh8Ux32 s (Const32 [5]))))))) + // match: (Rsh64x8 x s) + // result: (Int64Make (Rsh32x8 (Int64Hi x) s) (Or32 (Or32 (Rsh32Ux8 (Int64Lo x) s) (Lsh32x8 (Int64Hi x) (Sub8 (Const8 [32]) s))) (And32 (Rsh32x8 (Int64Hi x) (Sub8 s (Const8 [32]))) (Zeromask (ZeroExt8to32 (Rsh8Ux32 s (Const32 [5]))))))) for { - if v_0.Op != OpInt64Make { - break - } - lo := v_0.Args[1] - hi := v_0.Args[0] + x := v_0 s := v_1 v.reset(OpInt64Make) v0 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32) - v0.AddArg2(hi, s) - v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v1.AddArg(x) + v0.AddArg2(v1, s) v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) - v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) - v3.AddArg2(lo, s) - v4 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v6 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) - v6.AuxInt = int8ToAuxInt(32) - v5.AddArg2(v6, s) - v4.AddArg2(hi, v5) - v2.AddArg2(v3, v4) - v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) - v8 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32) - v9 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) - v9.AddArg2(s, v6) - v8.AddArg2(hi, v9) - v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) - v11 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) - v12 := b.NewValue0(v.Pos, OpRsh8Ux32, typ.UInt8) - v13 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v13.AuxInt = int32ToAuxInt(5) - v12.AddArg2(s, v13) - v11.AddArg(v12) - v10.AddArg(v11) - v7.AddArg2(v8, v10) - v1.AddArg2(v2, v7) - v.AddArg2(v0, v1) + v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32) + v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v5.AddArg(x) + v4.AddArg2(v5, s) + v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v8 := b.NewValue0(v.Pos, OpConst8, typ.UInt8) + v8.AuxInt = int8ToAuxInt(32) + v7.AddArg2(v8, s) + v6.AddArg2(v1, v7) + v3.AddArg2(v4, v6) + v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32) + v11 := b.NewValue0(v.Pos, OpSub8, typ.UInt8) + v11.AddArg2(s, v8) + v10.AddArg2(v1, v11) + v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v13 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32) + v14 := b.NewValue0(v.Pos, OpRsh8Ux32, typ.UInt8) + v15 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v15.AuxInt = int32ToAuxInt(5) + v14.AddArg2(s, v15) + v13.AddArg(v14) + v12.AddArg(v13) + v9.AddArg2(v10, v12) + v2.AddArg2(v3, v9) + v.AddArg2(v0, v2) return true } - return false } func rewriteValuedec64_OpRsh8Ux64(v *Value) bool { v_1 := v.Args[1] @@ -1863,7 +2074,23 @@ func rewriteValuedec64_OpRsh8Ux64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh8Ux64 x y) + // result: (Rsh8Ux32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh8Ux32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpRsh8x64(v *Value) bool { v_1 := v.Args[1] @@ -1931,7 +2158,23 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool { v.AddArg2(x, v0) return true } - return false + // match: (Rsh8x64 x y) + // result: (Rsh8x32 x (Or32 (Zeromask (Int64Hi y)) (Int64Lo y))) + for { + x := v_0 + y := v_1 + v.reset(OpRsh8x32) + v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32) + v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32) + v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v2.AddArg(y) + v1.AddArg(v2) + v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v3.AddArg(y) + v0.AddArg2(v1, v3) + v.AddArg2(x, v0) + return true + } } func rewriteValuedec64_OpSignExt16to64(v *Value) bool { v_0 := v.Args[0] @@ -2071,6 +2314,8 @@ func rewriteValuedec64_OpSub64(v *Value) bool { } func rewriteValuedec64_OpTrunc64to16(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (Trunc64to16 (Int64Make _ lo)) // result: (Trunc32to16 lo) for { @@ -2082,7 +2327,16 @@ func rewriteValuedec64_OpTrunc64to16(v *Value) bool { v.AddArg(lo) return true } - return false + // match: (Trunc64to16 x) + // result: (Trunc32to16 (Int64Lo x)) + for { + x := v_0 + v.reset(OpTrunc32to16) + v0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } } func rewriteValuedec64_OpTrunc64to32(v *Value) bool { v_0 := v.Args[0] @@ -2096,10 +2350,19 @@ func rewriteValuedec64_OpTrunc64to32(v *Value) bool { v.copyOf(lo) return true } - return false + // match: (Trunc64to32 x) + // result: (Int64Lo x) + for { + x := v_0 + v.reset(OpInt64Lo) + v.AddArg(x) + return true + } } func rewriteValuedec64_OpTrunc64to8(v *Value) bool { v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (Trunc64to8 (Int64Make _ lo)) // result: (Trunc32to8 lo) for { @@ -2111,7 +2374,16 @@ func rewriteValuedec64_OpTrunc64to8(v *Value) bool { v.AddArg(lo) return true } - return false + // match: (Trunc64to8 x) + // result: (Trunc32to8 (Int64Lo x)) + for { + x := v_0 + v.reset(OpTrunc32to8) + v0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + v0.AddArg(x) + v.AddArg(v0) + return true + } } func rewriteValuedec64_OpXor64(v *Value) bool { v_1 := v.Args[1] -- 2.48.1