]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: avoid generating CSEs; do all aggregates; maintain debug names
authorDavid Chase <drchase@google.com>
Mon, 17 Aug 2020 20:57:22 +0000 (16:57 -0400)
committerDavid Chase <drchase@google.com>
Fri, 23 Oct 2020 18:02:16 +0000 (18:02 +0000)
This adds a pass to detect common selection operations,
to avoid generating duplicates.  Duplicate offsets are
also detected.

All aggregate types are now handled; there is some freedom in where
expand_calls is run, though it must run before softfloat.

Debug-name-maintenance is now incremental both in decompose builtin
and in expand_calls; it might be good to push this into all the
decompose passes.

(this is a smash of 5 CLs that rewrote some of the same code several
times to deal with phase-ordering problems, and included an abandoned
attempt.)

For #40724.

Change-Id: I2a0c32f20660bf8b99e2bcecd33545d97d2bd3c6
Reviewed-on: https://go-review.googlesource.com/c/go/+/249458
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/fmtmap_test.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/decompose.go
src/cmd/compile/internal/ssa/expand_calls.go
src/cmd/compile/internal/ssa/export_test.go
src/cmd/compile/internal/ssa/gen/dec64.rules
src/cmd/compile/internal/ssa/rewritedec64.go

index 179c60187fe9d6328fe250fa31c6478d0b9c8446..f8c33ec1f9b21ff7453f113729bb09ab02e32e8a 100644 (file)
@@ -136,6 +136,7 @@ var knownFormats = map[string]string{
        "cmd/compile/internal/types.EType %s":             "",
        "cmd/compile/internal/types.EType %v":             "",
        "cmd/internal/obj.ABI %v":                         "",
+       "cmd/internal/src.XPos %v":                        "",
        "error %v":                                        "",
        "float64 %.2f":                                    "",
        "float64 %.3f":                                    "",
index 979a092ba1407a636286d9be419316eb84f85b22..f840ef40663858e0aac0469bd03732ee82c26886 100644 (file)
@@ -4740,7 +4740,7 @@ func (s *state) getClosureAndRcvr(fn *Node) (*ssa.Value, *ssa.Value) {
        s.nilCheck(itab)
        itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab
        closure := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab)
-       rcvr := s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
+       rcvr := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, i)
        return closure, rcvr
 }
 
@@ -6904,56 +6904,38 @@ func (e *ssafn) Auto(pos src.XPos, t *types.Type) ssa.GCNode {
 }
 
 func (e *ssafn) SplitString(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
-       n := name.N.(*Node)
        ptrType := types.NewPtr(types.Types[TUINT8])
        lenType := types.Types[TINT]
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Split this string up into two separate variables.
-               p := e.splitSlot(&name, ".ptr", 0, ptrType)
-               l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
-               return p, l
-       }
-       // Return the two parts of the larger variable.
-       return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off}, ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)}
+       // Split this string up into two separate variables.
+       p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+       l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+       return p, l
 }
 
 func (e *ssafn) SplitInterface(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
        n := name.N.(*Node)
        u := types.Types[TUINTPTR]
        t := types.NewPtr(types.Types[TUINT8])
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Split this interface up into two separate variables.
-               f := ".itab"
-               if n.Type.IsEmptyInterface() {
-                       f = ".type"
-               }
-               c := e.splitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
-               d := e.splitSlot(&name, ".data", u.Size(), t)
-               return c, d
+       // Split this interface up into two separate variables.
+       f := ".itab"
+       if n.Type.IsEmptyInterface() {
+               f = ".type"
        }
-       // Return the two parts of the larger variable.
-       return ssa.LocalSlot{N: n, Type: u, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + int64(Widthptr)}
+       c := e.SplitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
+       d := e.SplitSlot(&name, ".data", u.Size(), t)
+       return c, d
 }
 
 func (e *ssafn) SplitSlice(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot, ssa.LocalSlot) {
-       n := name.N.(*Node)
        ptrType := types.NewPtr(name.Type.Elem())
        lenType := types.Types[TINT]
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Split this slice up into three separate variables.
-               p := e.splitSlot(&name, ".ptr", 0, ptrType)
-               l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
-               c := e.splitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
-               return p, l, c
-       }
-       // Return the three parts of the larger variable.
-       return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off},
-               ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)},
-               ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(2*Widthptr)}
+       p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+       l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+       c := e.SplitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
+       return p, l, c
 }
 
 func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
-       n := name.N.(*Node)
        s := name.Type.Size() / 2
        var t *types.Type
        if s == 8 {
@@ -6961,53 +6943,35 @@ func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot)
        } else {
                t = types.Types[TFLOAT32]
        }
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Split this complex up into two separate variables.
-               r := e.splitSlot(&name, ".real", 0, t)
-               i := e.splitSlot(&name, ".imag", t.Size(), t)
-               return r, i
-       }
-       // Return the two parts of the larger variable.
-       return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s}
+       r := e.SplitSlot(&name, ".real", 0, t)
+       i := e.SplitSlot(&name, ".imag", t.Size(), t)
+       return r, i
 }
 
 func (e *ssafn) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
-       n := name.N.(*Node)
        var t *types.Type
        if name.Type.IsSigned() {
                t = types.Types[TINT32]
        } else {
                t = types.Types[TUINT32]
        }
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Split this int64 up into two separate variables.
-               if thearch.LinkArch.ByteOrder == binary.BigEndian {
-                       return e.splitSlot(&name, ".hi", 0, t), e.splitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
-               }
-               return e.splitSlot(&name, ".hi", t.Size(), t), e.splitSlot(&name, ".lo", 0, types.Types[TUINT32])
-       }
-       // Return the two parts of the larger variable.
        if thearch.LinkArch.ByteOrder == binary.BigEndian {
-               return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off + 4}
+               return e.SplitSlot(&name, ".hi", 0, t), e.SplitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
        }
-       return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off}
+       return e.SplitSlot(&name, ".hi", t.Size(), t), e.SplitSlot(&name, ".lo", 0, types.Types[TUINT32])
 }
 
 func (e *ssafn) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
-       n := name.N.(*Node)
        st := name.Type
        ft := st.FieldType(i)
        var offset int64
        for f := 0; f < i; f++ {
                offset += st.FieldType(f).Size()
        }
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               // Note: the _ field may appear several times.  But
-               // have no fear, identically-named but distinct Autos are
-               // ok, albeit maybe confusing for a debugger.
-               return e.splitSlot(&name, "."+st.FieldName(i), offset, ft)
-       }
-       return ssa.LocalSlot{N: n, Type: ft, Off: name.Off + st.FieldOff(i)}
+       // Note: the _ field may appear several times.  But
+       // have no fear, identically-named but distinct Autos are
+       // ok, albeit maybe confusing for a debugger.
+       return e.SplitSlot(&name, "."+st.FieldName(i), offset, ft)
 }
 
 func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
@@ -7017,19 +6981,23 @@ func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
                e.Fatalf(n.Pos, "bad array size")
        }
        et := at.Elem()
-       if n.Class() == PAUTO && !n.Name.Addrtaken() {
-               return e.splitSlot(&name, "[0]", 0, et)
-       }
-       return ssa.LocalSlot{N: n, Type: et, Off: name.Off}
+       return e.SplitSlot(&name, "[0]", 0, et)
 }
 
 func (e *ssafn) DerefItab(it *obj.LSym, offset int64) *obj.LSym {
        return itabsym(it, offset)
 }
 
-// splitSlot returns a slot representing the data of parent starting at offset.
-func (e *ssafn) splitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
-       s := &types.Sym{Name: parent.N.(*Node).Sym.Name + suffix, Pkg: localpkg}
+// SplitSlot returns a slot representing the data of parent starting at offset.
+func (e *ssafn) SplitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
+       node := parent.N.(*Node)
+
+       if node.Class() != PAUTO || node.Name.Addrtaken() {
+               // addressed things and non-autos retain their parents (i.e., cannot truly be split)
+               return ssa.LocalSlot{N: node, Type: t, Off: parent.Off + offset}
+       }
+
+       s := &types.Sym{Name: node.Sym.Name + suffix, Pkg: localpkg}
 
        n := &Node{
                Name: new(Name),
index 0664c0ba466be73688525a08e34564acb2ca22de..bddd271273a9a306cd9805452a09ddbd775b79ca 100644 (file)
@@ -441,8 +441,8 @@ var passes = [...]pass{
        {name: "nilcheckelim", fn: nilcheckelim},
        {name: "prove", fn: prove},
        {name: "early fuse", fn: fuseEarly},
-       {name: "expand calls", fn: expandCalls, required: true},
        {name: "decompose builtin", fn: decomposeBuiltIn, required: true},
+       {name: "expand calls", fn: expandCalls, required: true},
        {name: "softfloat", fn: softfloat, required: true},
        {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
        {name: "dead auto elim", fn: elimDeadAutosGeneric},
index f1a748309c66e51603fe1aeb0e127c6c2a15c32b..cb6f6fe7a14a208c9db5c88786f3768fd5c6fc08 100644 (file)
@@ -149,6 +149,7 @@ type Frontend interface {
        SplitStruct(LocalSlot, int) LocalSlot
        SplitArray(LocalSlot) LocalSlot              // array must be length 1
        SplitInt64(LocalSlot) (LocalSlot, LocalSlot) // returns (hi, lo)
+       SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot
 
        // DerefItab dereferences an itab function
        // entry, given the symbol of the itab and
index ab27ba85ae789ea84e6368e9a20e5d74dc6ce6e1..bf7f1e826b17ee66fae6d219d54cb037be25d496 100644 (file)
@@ -6,6 +6,7 @@ package ssa
 
 import (
        "cmd/compile/internal/types"
+       "sort"
 )
 
 // decompose converts phi ops on compound builtin types into phi
@@ -31,77 +32,79 @@ func decomposeBuiltIn(f *Func) {
        }
 
        // Split up named values into their components.
+       // accumulate old names for aggregates (that are decomposed) in toDelete for efficient bulk deletion,
+       // accumulate new LocalSlots in newNames for addition after the iteration.  This decomposition is for
+       // builtin types with leaf components, and thus there is no need to reprocess the newly create LocalSlots.
+       var toDelete []namedVal
        var newNames []LocalSlot
-       for _, name := range f.Names {
+       for i, name := range f.Names {
                t := name.Type
                switch {
                case t.IsInteger() && t.Size() > f.Config.RegSize:
                        hiName, loName := f.fe.SplitInt64(name)
                        newNames = append(newNames, hiName, loName)
-                       for _, v := range f.NamedValues[name] {
+                       for j, v := range f.NamedValues[name] {
                                if v.Op != OpInt64Make {
                                        continue
                                }
                                f.NamedValues[hiName] = append(f.NamedValues[hiName], v.Args[0])
                                f.NamedValues[loName] = append(f.NamedValues[loName], v.Args[1])
+                               toDelete = append(toDelete, namedVal{i, j})
                        }
-                       delete(f.NamedValues, name)
                case t.IsComplex():
                        rName, iName := f.fe.SplitComplex(name)
                        newNames = append(newNames, rName, iName)
-                       for _, v := range f.NamedValues[name] {
+                       for j, v := range f.NamedValues[name] {
                                if v.Op != OpComplexMake {
                                        continue
                                }
                                f.NamedValues[rName] = append(f.NamedValues[rName], v.Args[0])
                                f.NamedValues[iName] = append(f.NamedValues[iName], v.Args[1])
-
+                               toDelete = append(toDelete, namedVal{i, j})
                        }
-                       delete(f.NamedValues, name)
                case t.IsString():
                        ptrName, lenName := f.fe.SplitString(name)
                        newNames = append(newNames, ptrName, lenName)
-                       for _, v := range f.NamedValues[name] {
+                       for j, v := range f.NamedValues[name] {
                                if v.Op != OpStringMake {
                                        continue
                                }
                                f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
                                f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
+                               toDelete = append(toDelete, namedVal{i, j})
                        }
-                       delete(f.NamedValues, name)
                case t.IsSlice():
                        ptrName, lenName, capName := f.fe.SplitSlice(name)
                        newNames = append(newNames, ptrName, lenName, capName)
-                       for _, v := range f.NamedValues[name] {
+                       for j, v := range f.NamedValues[name] {
                                if v.Op != OpSliceMake {
                                        continue
                                }
                                f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
                                f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
                                f.NamedValues[capName] = append(f.NamedValues[capName], v.Args[2])
+                               toDelete = append(toDelete, namedVal{i, j})
                        }
-                       delete(f.NamedValues, name)
                case t.IsInterface():
                        typeName, dataName := f.fe.SplitInterface(name)
                        newNames = append(newNames, typeName, dataName)
-                       for _, v := range f.NamedValues[name] {
+                       for j, v := range f.NamedValues[name] {
                                if v.Op != OpIMake {
                                        continue
                                }
                                f.NamedValues[typeName] = append(f.NamedValues[typeName], v.Args[0])
                                f.NamedValues[dataName] = append(f.NamedValues[dataName], v.Args[1])
+                               toDelete = append(toDelete, namedVal{i, j})
                        }
-                       delete(f.NamedValues, name)
                case t.IsFloat():
                        // floats are never decomposed, even ones bigger than RegSize
-                       newNames = append(newNames, name)
                case t.Size() > f.Config.RegSize:
                        f.Fatalf("undecomposed named type %s %v", name, t)
-               default:
-                       newNames = append(newNames, name)
                }
        }
-       f.Names = newNames
+
+       deleteNamedVals(f, toDelete)
+       f.Names = append(f.Names, newNames...)
 }
 
 func decomposeBuiltInPhi(v *Value) {
@@ -263,14 +266,20 @@ func decomposeUserArrayInto(f *Func, name LocalSlot, slots []LocalSlot) []LocalS
                f.Fatalf("array not of size 1")
        }
        elemName := f.fe.SplitArray(name)
+       var keep []*Value
        for _, v := range f.NamedValues[name] {
                if v.Op != OpArrayMake1 {
+                       keep = append(keep, v)
                        continue
                }
                f.NamedValues[elemName] = append(f.NamedValues[elemName], v.Args[0])
        }
-       // delete the name for the array as a whole
-       delete(f.NamedValues, name)
+       if len(keep) == 0 {
+               // delete the name for the array as a whole
+               delete(f.NamedValues, name)
+       } else {
+               f.NamedValues[name] = keep
+       }
 
        if t.Elem().IsArray() {
                return decomposeUserArrayInto(f, elemName, slots)
@@ -300,17 +309,23 @@ func decomposeUserStructInto(f *Func, name LocalSlot, slots []LocalSlot) []Local
        }
 
        makeOp := StructMakeOp(n)
+       var keep []*Value
        // create named values for each struct field
        for _, v := range f.NamedValues[name] {
                if v.Op != makeOp {
+                       keep = append(keep, v)
                        continue
                }
                for i := 0; i < len(fnames); i++ {
                        f.NamedValues[fnames[i]] = append(f.NamedValues[fnames[i]], v.Args[i])
                }
        }
-       // remove the name of the struct as a whole
-       delete(f.NamedValues, name)
+       if len(keep) == 0 {
+               // delete the name for the struct as a whole
+               delete(f.NamedValues, name)
+       } else {
+               f.NamedValues[name] = keep
+       }
 
        // now that this f.NamedValues contains values for the struct
        // fields, recurse into nested structs
@@ -400,3 +415,35 @@ func StructMakeOp(nf int) Op {
        }
        panic("too many fields in an SSAable struct")
 }
+
+type namedVal struct {
+       locIndex, valIndex int // f.NamedValues[f.Names[locIndex]][valIndex] = key
+}
+
+// deleteNamedVals removes particular values with debugger names from f's naming data structures
+func deleteNamedVals(f *Func, toDelete []namedVal) {
+       // Arrange to delete from larger indices to smaller, to ensure swap-with-end deletion does not invalid pending indices.
+       sort.Slice(toDelete, func(i, j int) bool {
+               if toDelete[i].locIndex != toDelete[j].locIndex {
+                       return toDelete[i].locIndex > toDelete[j].locIndex
+               }
+               return toDelete[i].valIndex > toDelete[j].valIndex
+
+       })
+
+       // Get rid of obsolete names
+       for _, d := range toDelete {
+               loc := f.Names[d.locIndex]
+               vals := f.NamedValues[loc]
+               l := len(vals) - 1
+               if l > 0 {
+                       vals[d.valIndex] = vals[l]
+                       f.NamedValues[loc] = vals[:l]
+               } else {
+                       delete(f.NamedValues, loc)
+                       l = len(f.Names) - 1
+                       f.Names[d.locIndex] = f.Names[l]
+                       f.Names = f.Names[:l]
+               }
+       }
+}
index bbd9aeee51249aa813463020f556c1b74310426f..3e3573ff394ce95d60de24471e0a10efdd30c2a4 100644 (file)
@@ -11,27 +11,44 @@ import (
        "sort"
 )
 
+type selKey struct {
+       from   *Value
+       offset int64
+       size   int64
+       typ    types.EType
+}
+
+type offsetKey struct {
+       from   *Value
+       offset int64
+       pt     *types.Type
+}
+
 // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
 // that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
 // more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
-// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
-// split between registers and memory).
-//
-// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
+// reached.
 func expandCalls(f *Func) {
+       // Calls that need lowering have some number of inputs, including a memory input,
+       // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
+
+       // With the current ABI those inputs need to be converted into stores to memory,
+       // rethreading the call's memory input to the first, and the new call now receiving the last.
+
+       // With the current ABI, the outputs need to be converted to loads, which will all use the call's
+       // memory output as their input.
        if !LateCallExpansionEnabledWithin(f) {
                return
        }
+       debug := f.pass.debug > 0
+
        canSSAType := f.fe.CanSSA
        regSize := f.Config.RegSize
        sp, _ := f.spSb()
+       typ := &f.Config.Types
+       ptrSize := f.Config.PtrSize
 
-       debug := f.pass.debug > 0
-
-       // For 32-bit, need to deal with decomposition of 64-bit integers
-       tUint32 := types.Types[types.TUINT32]
-       tInt32 := types.Types[types.TINT32]
+       // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
        var hiOffset, lowOffset int64
        if f.Config.BigEndian {
                lowOffset = 4
@@ -39,25 +56,63 @@ func expandCalls(f *Func) {
                hiOffset = 4
        }
 
+       namedSelects := make(map[*Value][]namedVal)
+
        // intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
        // that has no 64-bit integer registers.
        intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
-               tHi = tUint32
+               tHi = typ.UInt32
                if et == types.TINT64 {
-                       tHi = tInt32
+                       tHi = typ.Int32
                }
-               tLo = tUint32
+               tLo = typ.UInt32
                return
        }
 
        // isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
-       // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
-       // Other aggregate types are expanded in decomposeBuiltin, which comes later.
+       // that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin,
+       // so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit
+       // integer on 32-bit).
        isAlreadyExpandedAggregateType := func(t *types.Type) bool {
                if !canSSAType(t) {
                        return false
                }
-               return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
+               return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() ||
+                       t.Size() > regSize && t.IsInteger()
+       }
+
+       offsets := make(map[offsetKey]*Value)
+
+       // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+       // TODO should also optimize offsets from SB?
+       offsetFrom := func(from *Value, offset int64, pt *types.Type) *Value {
+               if offset == 0 && from.Type == pt { // this is not actually likely
+                       return from
+               }
+               // Simplify, canonicalize
+               for from.Op == OpOffPtr {
+                       offset += from.AuxInt
+                       from = from.Args[0]
+               }
+               if from == sp {
+                       return f.ConstOffPtrSP(pt, offset, sp)
+               }
+               key := offsetKey{from, offset, pt}
+               v := offsets[key]
+               if v != nil {
+                       return v
+               }
+               v = from.Block.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+               offsets[key] = v
+               return v
+       }
+
+       splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
+               var locs []LocalSlot
+               for i := range ls {
+                       locs = append(locs, f.fe.SplitSlot(&ls[i], sfx, offset, ty))
+               }
+               return locs
        }
 
        // removeTrivialWrapperTypes unwraps layers of
@@ -97,11 +152,16 @@ func expandCalls(f *Func) {
        // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
        // It emits the code necessary to implement the leaf select operation that leads to the call.
        // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
-       var rewriteSelect func(leaf *Value, selector *Value, offset int64)
-       rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
+       var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
+       rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
+               var locs []LocalSlot
+               leafType := leaf.Type
                switch selector.Op {
                case OpSelectN:
                        // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+                       for _, s := range namedSelects[selector] {
+                               locs = append(locs, f.Names[s.locIndex])
+                       }
                        call := selector.Args[0]
                        aux := call.Aux.(*AuxCall)
                        which := selector.AuxInt
@@ -110,9 +170,13 @@ func expandCalls(f *Func) {
                                leaf.copyOf(call)
                        } else {
                                leafType := removeTrivialWrapperTypes(leaf.Type)
-                               pt := types.NewPtr(leafType)
                                if canSSAType(leafType) {
-                                       off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
+                                       for leafType.Etype == types.TSTRUCT && leafType.NumFields() == 1 {
+                                               // This may not be adequately general -- consider [1]etc but this is caused by immediate IDATA
+                                               leafType = leafType.Field(0).Type
+                                       }
+                                       pt := types.NewPtr(leafType)
+                                       off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
                                        // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
                                        if leaf.Block == call.Block {
                                                leaf.reset(OpLoad)
@@ -123,46 +187,110 @@ func expandCalls(f *Func) {
                                                leaf.copyOf(w)
                                        }
                                } else {
-                                       panic("Should not have non-SSA-able OpSelectN")
+                                       f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
                                }
                        }
                case OpStructSelect:
                        w := selector.Args[0]
+                       var ls []LocalSlot
                        if w.Type.Etype != types.TSTRUCT {
-                               fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+                               f.Fatalf("Bad type for w: v=%v; sel=%v; w=%v; ,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+                               // Artifact of immediate interface idata
+                               ls = rewriteSelect(leaf, w, offset)
+                       } else {
+                               ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
+                               for _, l := range ls {
+                                       locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+                               }
                        }
-                       rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
+
+               case OpArraySelect:
+                       w := selector.Args[0]
+                       rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
 
                case OpInt64Hi:
                        w := selector.Args[0]
-                       rewriteSelect(leaf, w, offset+hiOffset)
+                       ls := rewriteSelect(leaf, w, offset+hiOffset)
+                       locs = splitSlots(ls, ".hi", hiOffset, leafType)
 
                case OpInt64Lo:
                        w := selector.Args[0]
-                       rewriteSelect(leaf, w, offset+lowOffset)
+                       ls := rewriteSelect(leaf, w, offset+lowOffset)
+                       locs = splitSlots(ls, ".lo", lowOffset, leafType)
 
-               case OpArraySelect:
+               case OpStringPtr:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset)
+                       locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
+                       //for i := range ls {
+                       //      locs = append(locs, f.fe.SplitSlot(&ls[i], ".ptr", 0, typ.BytePtr))
+                       //}
+               case OpSlicePtr:
                        w := selector.Args[0]
-                       rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
+                       ls := rewriteSelect(leaf, w, offset)
+                       locs = splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem()))
+
+               case OpITab:
+                       w := selector.Args[0]
+                       ls := rewriteSelect(leaf, w, offset)
+                       sfx := ".itab"
+                       if w.Type.IsEmptyInterface() {
+                               sfx = ".type"
+                       }
+                       locs = splitSlots(ls, sfx, 0, typ.Uintptr)
+
+               case OpComplexReal:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset)
+                       locs = splitSlots(ls, ".real", 0, leafType)
+
+               case OpComplexImag:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset+leafType.Width) // result is FloatNN, width of result is offset of imaginary part.
+                       locs = splitSlots(ls, ".imag", leafType.Width, leafType)
+
+               case OpStringLen, OpSliceLen:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+                       locs = splitSlots(ls, ".len", ptrSize, leafType)
+
+               case OpIData:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+                       locs = splitSlots(ls, ".data", ptrSize, leafType)
+
+               case OpSliceCap:
+                       ls := rewriteSelect(leaf, selector.Args[0], offset+2*ptrSize)
+                       locs = splitSlots(ls, ".cap", 2*ptrSize, leafType)
+
+               case OpCopy: // If it's an intermediate result, recurse
+                       locs = rewriteSelect(leaf, selector.Args[0], offset)
+                       for _, s := range namedSelects[selector] {
+                               // this copy may have had its own name, preserve that, too.
+                               locs = append(locs, f.Names[s.locIndex])
+                       }
+
                default:
-                       // Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
+                       // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed).
                }
+
+               return locs
        }
 
        // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
        // smaller types into individual parameter slots.
-       // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
        var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
        storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+               if debug {
+                       fmt.Printf("\tstoreArg(%s;  %s;  %v;  %d;  %s)\n", b, a.LongString(), t, offset, mem.String())
+               }
+
                switch a.Op {
                case OpArrayMake0, OpStructMake0:
                        return mem
+
                case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
                        for i := 0; i < t.NumFields(); i++ {
                                fld := t.Field(i)
                                mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
                        }
                        return mem
+
                case OpArrayMake1:
                        return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
 
@@ -170,55 +298,51 @@ func expandCalls(f *Func) {
                        tHi, tLo := intPairTypes(t.Etype)
                        mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
                        return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+
+               case OpComplexMake:
+                       tPart := typ.Float32
+                       wPart := t.Width / 2
+                       if wPart == 8 {
+                               tPart = typ.Float64
+                       }
+                       mem = storeArg(pos, b, a.Args[0], tPart, offset, mem)
+                       return storeArg(pos, b, a.Args[1], tPart, offset+wPart, mem)
+
+               case OpIMake:
+                       mem = storeArg(pos, b, a.Args[0], typ.Uintptr, offset, mem)
+                       return storeArg(pos, b, a.Args[1], typ.BytePtr, offset+ptrSize, mem)
+
+               case OpStringMake:
+                       mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
+                       return storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+
+               case OpSliceMake:
+                       mem = storeArg(pos, b, a.Args[0], typ.BytePtr, offset, mem)
+                       mem = storeArg(pos, b, a.Args[1], typ.Int, offset+ptrSize, mem)
+                       return storeArg(pos, b, a.Args[2], typ.Int, offset+2*ptrSize, mem)
                }
-               dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
+
+               dst := offsetFrom(sp, offset, types.NewPtr(t))
                x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
                if debug {
-                       fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
+                       fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
                }
                return x
        }
 
-       // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
-       // TODO should also optimize offsets from SB?
-       offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
-               pt := types.NewPtr(t)
-               if offset == 0 && dst.Type == pt { // this is not actually likely
-                       return dst
-               }
-               if dst.Op != OpOffPtr {
-                       return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
-               }
-               // Simplify OpOffPtr
-               from := dst.Args[0]
-               offset += dst.AuxInt
-               if from == sp {
-                       return f.ConstOffPtrSP(pt, offset, sp)
-               }
-               return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
-       }
-
        // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
        // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
-       var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
-       splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
-               // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
+       // This has to handle aggregate types that have already been lowered by an earlier phase.
+       var splitStore func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
+       splitStore = func(dest, source, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
+               if debug {
+                       fmt.Printf("\tsplitStore(%s;  %s;  %s;  %s;  %v;  %d;  %v)\n", dest.LongString(), source.LongString(), mem.String(), v.LongString(), t, offset, firstStorePos)
+               }
                pos := v.Pos.WithNotStmt()
                switch t.Etype {
-               case types.TINT64, types.TUINT64:
-                       if t.Width == regSize {
-                               break
-                       }
-                       tHi, tLo := intPairTypes(t.Etype)
-                       sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
-                       mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
-                       firstStorePos = firstStorePos.WithNotStmt()
-                       sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
-                       return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
-
                case types.TARRAY:
                        elt := t.Elem()
-                       if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
+                       if t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
                                t = removeTrivialWrapperTypes(t)
                                if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
                                        f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
@@ -226,13 +350,14 @@ func expandCalls(f *Func) {
                                break // handle the leaf type.
                        }
                        for i := int64(0); i < t.NumElem(); i++ {
-                               sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
-                               mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
+                               sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
+                               mem = splitStore(dest, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
                                firstStorePos = firstStorePos.WithNotStmt()
                        }
                        return mem
+
                case types.TSTRUCT:
-                       if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+                       if t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
                                // This peculiar test deals with accesses to immediate interface data.
                                // It works okay because everything is the same size.
                                // Example code that triggers this can be found in go/constant/value.go, function ToComplex
@@ -240,26 +365,87 @@ func expandCalls(f *Func) {
                                // v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
                                // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
                                // Guard against "struct{struct{*foo}}"
+                               // Other rewriting phases create minor glitches when they transform IData, for instance the
+                               // interface-typed Arg "x" of ToFloat in go/constant/value.go
+                               //   v6 (858) = Arg <Value> {x} (x[Value], x[Value])
+                               // is rewritten by decomposeArgs into
+                               //   v141 (858) = Arg <uintptr> {x}
+                               //   v139 (858) = Arg <*uint8> {x} [8]
+                               // because of a type case clause on line 862 of go/constant/value.go
+                               //      case intVal:
+                               //                 return itof(x)
+                               // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
+                               // of a *uint8, which does not succeed.
                                t = removeTrivialWrapperTypes(t)
-                               if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
-                                       f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
-                               }
-                               break // handle the leaf type.
+
+                               // it could be a leaf type, but the "leaf" could be complex64 (for example)
+                               return splitStore(dest, source, mem, v, t, offset, firstStorePos)
                        }
+
                        for i := 0; i < t.NumFields(); i++ {
                                fld := t.Field(i)
-                               sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
-                               mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
+                               sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
+                               mem = splitStore(dest, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
                                firstStorePos = firstStorePos.WithNotStmt()
                        }
                        return mem
+
+               case types.TINT64, types.TUINT64:
+                       if t.Width == regSize {
+                               break
+                       }
+                       tHi, tLo := intPairTypes(t.Etype)
+                       sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
+                       mem = splitStore(dest, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
+                       return splitStore(dest, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+
+               case types.TINTER:
+                       sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
+                       mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
+                       return splitStore(dest, sel, mem, v, typ.BytePtr, offset+ptrSize, firstStorePos)
+
+               case types.TSTRING:
+                       sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
+                       mem = splitStore(dest, sel, mem, v, typ.BytePtr, offset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
+                       return splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+
+               case types.TSLICE:
+                       et := types.NewPtr(t.Elem())
+                       sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
+                       mem = splitStore(dest, sel, mem, v, et, offset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
+                       mem = splitStore(dest, sel, mem, v, typ.Int, offset+ptrSize, firstStorePos)
+                       sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
+                       return splitStore(dest, sel, mem, v, typ.Int, offset+2*ptrSize, firstStorePos)
+
+               case types.TCOMPLEX64:
+                       sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
+                       mem = splitStore(dest, sel, mem, v, typ.Float32, offset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
+                       return splitStore(dest, sel, mem, v, typ.Float32, offset+4, firstStorePos)
+
+               case types.TCOMPLEX128:
+                       sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
+                       mem = splitStore(dest, sel, mem, v, typ.Float64, offset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
+                       return splitStore(dest, sel, mem, v, typ.Float64, offset+8, firstStorePos)
                }
                // Default, including for aggregates whose single element exactly fills their container
                // TODO this will be a problem for cast interfaces containing floats when we move to registers.
-               x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
+               x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dest, offset, types.NewPtr(t)), source, mem)
                if debug {
-                       fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
+                       fmt.Printf("\t\tsplitStore returns %s\n", x.LongString())
                }
+
                return x
        }
 
@@ -286,21 +472,24 @@ func expandCalls(f *Func) {
                                }
                                // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
                                // TODO this will be more complicated with registers in the picture.
-                               src := a.Args[0]
-                               dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
+                               source := a.Args[0]
+                               dst := f.ConstOffPtrSP(source.Type, aux.OffsetOfArg(auxI), sp)
                                if a.Uses == 1 && a.Block == v.Block {
                                        a.reset(OpMove)
                                        a.Pos = pos
                                        a.Type = types.TypeMem
                                        a.Aux = aux.TypeOfArg(auxI)
                                        a.AuxInt = aux.SizeOfArg(auxI)
-                                       a.SetArgs3(dst, src, mem)
+                                       a.SetArgs3(dst, source, mem)
                                        mem = a
                                } else {
-                                       mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
+                                       mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, source, mem)
                                        mem.AuxInt = aux.SizeOfArg(auxI)
                                }
                        } else {
+                               if debug {
+                                       fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
+                               }
                                mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
                        }
                }
@@ -308,6 +497,8 @@ func expandCalls(f *Func) {
                return mem
        }
 
+       // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
+
        // Step 0: rewrite the calls to convert incoming args to stores.
        for _, b := range f.Blocks {
                for _, v := range b.Values {
@@ -328,15 +519,40 @@ func expandCalls(f *Func) {
                }
        }
 
+       for i, name := range f.Names {
+               t := name.Type
+               if isAlreadyExpandedAggregateType(t) {
+                       for j, v := range f.NamedValues[name] {
+                               if v.Op == OpSelectN {
+                                       ns := namedSelects[v]
+                                       namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
+                               }
+                       }
+               }
+       }
+
        // Step 1: any stores of aggregates remaining are believed to be sourced from call results.
        // Decompose those stores into a series of smaller stores, adding selection ops as necessary.
        for _, b := range f.Blocks {
                for _, v := range b.Values {
                        if v.Op == OpStore {
                                t := v.Aux.(*types.Type)
-                               if isAlreadyExpandedAggregateType(t) {
-                                       dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
-                                       mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
+                               iAEATt := isAlreadyExpandedAggregateType(t)
+                               if !iAEATt {
+                                       // guarding against store immediate struct into interface data field -- store type is *uint8
+                                       // TODO can this happen recursively?
+                                       tSrc := v.Args[1].Type
+                                       iAEATt = isAlreadyExpandedAggregateType(tSrc)
+                                       if iAEATt {
+                                               t = tSrc
+                                       }
+                               }
+                               if iAEATt {
+                                       if debug {
+                                               fmt.Printf("Splitting store %s\n", v.LongString())
+                                       }
+                                       dst, source, mem := v.Args[0], v.Args[1], v.Args[2]
+                                       mem = splitStore(dst, source, mem, v, t, 0, v.Pos)
                                        v.copyOf(mem)
                                }
                        }
@@ -345,23 +561,32 @@ func expandCalls(f *Func) {
 
        val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
 
-       // Step 2: accumulate selection operations for rewrite in topological order.
+       // Step 2: transform or accumulate selection operations for rewrite in topological order.
+       //
+       // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish
+       // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures),
+       //
        // Any select-for-addressing applied to call results can be transformed directly.
-       // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
        for _, b := range f.Blocks {
                for _, v := range b.Values {
                        // Accumulate chains of selectors for processing in topological order
                        switch v.Op {
-                       case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
+                       case OpStructSelect, OpArraySelect,
+                               OpIData, OpITab,
+                               OpStringPtr, OpStringLen,
+                               OpSlicePtr, OpSliceLen, OpSliceCap,
+                               OpComplexReal, OpComplexImag,
+                               OpInt64Hi, OpInt64Lo:
                                w := v.Args[0]
                                switch w.Op {
-                               case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
+                               case OpStructSelect, OpArraySelect, OpSelectN:
                                        val2Preds[w] += 1
                                        if debug {
                                                fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
                                        }
                                }
                                fallthrough
+
                        case OpSelectN:
                                if _, ok := val2Preds[v]; !ok {
                                        val2Preds[v] = 0
@@ -369,53 +594,153 @@ func expandCalls(f *Func) {
                                                fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
                                        }
                                }
+
                        case OpSelectNAddr:
                                // Do these directly, there are no chains of selectors.
                                call := v.Args[0]
                                which := v.AuxInt
                                aux := call.Aux.(*AuxCall)
                                pt := v.Type
-                               off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
+                               off := offsetFrom(sp, aux.OffsetOfResult(which), pt)
                                v.copyOf(off)
                        }
                }
        }
 
-       // Compilation must be deterministic
-       var ordered []*Value
-       less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
+       // Step 3: Compute topological order of selectors,
+       // then process it in reverse to eliminate duplicates,
+       // then forwards to rewrite selectors.
+       //
+       // All chains of selectors end up in same block as the call.
+       sdom := f.Sdom()
+
+       // Compilation must be deterministic, so sort after extracting first zeroes from map.
+       // Sorting allows dominators-last order within each batch,
+       // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort).
+       var toProcess []*Value
+       less := func(i, j int) bool {
+               vi, vj := toProcess[i], toProcess[j]
+               bi, bj := vi.Block, vj.Block
+               if bi == bj {
+                       return vi.ID < vj.ID
+               }
+               return sdom.domorder(bi) > sdom.domorder(bj) // reverse the order to put dominators last.
+       }
 
-       // Step 3: Rewrite in topological order.  All chains of selectors end up in same block as the call.
+       // Accumulate order in allOrdered
+       var allOrdered []*Value
+       for v, n := range val2Preds {
+               if n == 0 {
+                       allOrdered = append(allOrdered, v)
+               }
+       }
+       last := 0 // allOrdered[0:last] has been top-sorted and processed
        for len(val2Preds) > 0 {
-               ordered = ordered[:0]
-               for v, n := range val2Preds {
-                       if n == 0 {
-                               ordered = append(ordered, v)
+               toProcess = allOrdered[last:]
+               last = len(allOrdered)
+               sort.SliceStable(toProcess, less)
+               for _, v := range toProcess {
+                       w := v.Args[0]
+                       delete(val2Preds, v)
+                       n, ok := val2Preds[w]
+                       if !ok {
+                               continue
                        }
+                       if n == 1 {
+                               allOrdered = append(allOrdered, w)
+                               delete(val2Preds, w)
+                               continue
+                       }
+                       val2Preds[w] = n - 1
                }
-               sort.Slice(ordered, less)
-               for _, v := range ordered {
-                       for {
-                               w := v.Args[0]
-                               if debug {
-                                       fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
-                               }
-                               delete(val2Preds, v)
-                               rewriteSelect(v, v, 0)
-                               v = w
-                               n, ok := val2Preds[v]
-                               if !ok {
-                                       break
-                               }
-                               if n != 1 {
-                                       val2Preds[v] = n - 1
-                                       break
-                               }
-                               // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
+       }
+
+       common := make(map[selKey]*Value)
+       // Rewrite duplicate selectors as copies where possible.
+       for i := len(allOrdered) - 1; i >= 0; i-- {
+               v := allOrdered[i]
+               w := v.Args[0]
+               for w.Op == OpCopy {
+                       w = w.Args[0]
+               }
+               typ := v.Type
+               if typ.IsMemory() {
+                       continue // handled elsewhere, not an indexable result
+               }
+               size := typ.Width
+               offset := int64(0)
+               switch v.Op {
+               case OpStructSelect:
+                       if w.Type.Etype == types.TSTRUCT {
+                               offset = w.Type.FieldOff(int(v.AuxInt))
+                       } else { // Immediate interface data artifact, offset is zero.
+                               f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString())
                        }
+               case OpArraySelect:
+                       offset = size * v.AuxInt
+               case OpSelectN:
+                       offset = w.Aux.(*AuxCall).OffsetOfResult(v.AuxInt)
+               case OpInt64Hi:
+                       offset = hiOffset
+               case OpInt64Lo:
+                       offset = lowOffset
+               case OpStringLen, OpSliceLen, OpIData:
+                       offset = ptrSize
+               case OpSliceCap:
+                       offset = 2 * ptrSize
+               case OpComplexImag:
+                       offset = size
+               }
+               sk := selKey{from: w, size: size, offset: offset, typ: typ.Etype}
+               dupe := common[sk]
+               if dupe == nil {
+                       common[sk] = v
+               } else if sdom.IsAncestorEq(dupe.Block, v.Block) {
+                       v.copyOf(dupe)
+               } else {
+                       // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen.
+                       // Installing the new value might match some future values.
+                       common[sk] = v
                }
        }
 
+       // Indices of entries in f.Names that need to be deleted.
+       var toDelete []namedVal
+
+       // Rewrite selectors.
+       for i, v := range allOrdered {
+               if debug {
+                       b := v.Block
+                       fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
+               }
+               if v.Uses == 0 {
+                       v.reset(OpInvalid)
+                       continue
+               }
+               if v.Op == OpCopy {
+                       continue
+               }
+               locs := rewriteSelect(v, v, 0)
+               // Install new names.
+               if v.Type.IsMemory() {
+                       continue
+               }
+               // Leaf types may have debug locations
+               if !isAlreadyExpandedAggregateType(v.Type) {
+                       for _, l := range locs {
+                               f.NamedValues[l] = append(f.NamedValues[l], v)
+                       }
+                       f.Names = append(f.Names, locs...)
+                       continue
+               }
+               // Not-leaf types that had debug locations need to lose them.
+               if ns, ok := namedSelects[v]; ok {
+                       toDelete = append(toDelete, ns...)
+               }
+       }
+
+       deleteNamedVals(f, toDelete)
+
        // Step 4: rewrite the calls themselves, correcting the type
        for _, b := range f.Blocks {
                for _, v := range b.Values {
index 51665c60e2783b2212ab49c031ccf57a1f3e69ab..b4c3e5cfdfb73a6a3965caff7e7a67ff3f3d5809 100644 (file)
@@ -125,6 +125,10 @@ func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot {
 func (d DummyFrontend) SplitArray(s LocalSlot) LocalSlot {
        return LocalSlot{N: s.N, Type: s.Type.Elem(), Off: s.Off}
 }
+
+func (d DummyFrontend) SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot {
+       return LocalSlot{N: parent.N, Type: t, Off: offset}
+}
 func (DummyFrontend) Line(_ src.XPos) string {
        return "unknown.go:0"
 }
index 4f9e863f9070b2cf01682aaf52cb08c8b3f8e7f6..07607960fa04a8744d03ea12aedd7b958fc9f51b 100644 (file)
@@ -9,7 +9,6 @@
 (Int64Hi (Int64Make hi _)) => hi
 (Int64Lo (Int64Make _ lo)) => lo
 
-
 (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
        (Int64Make
                (Load <typ.Int32> (OffPtr <typ.Int32Ptr> [4] ptr) mem)
 (Trunc64to32 (Int64Make _ lo)) => lo
 (Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo)
 (Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo)
+// Most general
+(Trunc64to32 x) => (Int64Lo x)
+(Trunc64to16 x) => (Trunc32to16 (Int64Lo x))
+(Trunc64to8 x) => (Trunc32to8 (Int64Lo x))
 
 (Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
 (Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x)
 // turn x64 non-constant shifts to x32 shifts
 // if high 32-bit of the shift is nonzero, make a huge shift
 (Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
 (Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
-       (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+       (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+
+// Most general
+(Lsh64x64 x y)  => (Lsh64x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64x64 x y)  => (Rsh64x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh32x64 x y)  => (Lsh32x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32x64 x y)  => (Rsh32x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh16x64 x y)  => (Lsh16x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16x64 x y)  => (Rsh16x32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh8x64 x y)   => (Lsh8x32   x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8x64 x y)   => (Rsh8x32   x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8Ux64 x y)  => (Rsh8Ux32  x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+
+// Clean up constants a little
+(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c == 0 => y
+(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c != 0 => (Const32 <typ.UInt32> [-1])
 
 // 64x left shift
 // result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
 // result.lo = lo<<s
-(Lsh64x32 (Int64Make hi lo) s) =>
+(Lsh64x32 x s) =>
        (Int64Make
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Lsh32x32 <typ.UInt32> hi s)
+                               (Lsh32x32 <typ.UInt32> (Int64Hi x) s)
                                (Rsh32Ux32 <typ.UInt32>
-                                       lo
+                                       (Int64Lo x)
                                        (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
                        (Lsh32x32 <typ.UInt32>
-                               lo
+                               (Int64Lo x)
                                (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))))
-               (Lsh32x32 <typ.UInt32> lo s))
-(Lsh64x16 (Int64Make hi lo) s) =>
+               (Lsh32x32 <typ.UInt32> (Int64Lo x) s))
+(Lsh64x16 x s) =>
        (Int64Make
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Lsh32x16 <typ.UInt32> hi s)
+                               (Lsh32x16 <typ.UInt32> (Int64Hi x) s)
                                (Rsh32Ux16 <typ.UInt32>
-                                       lo
+                                       (Int64Lo x)
                                        (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
                        (Lsh32x16 <typ.UInt32>
-                               lo
+                               (Int64Lo x)
                                (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))))
-               (Lsh32x16 <typ.UInt32> lo s))
-(Lsh64x8 (Int64Make hi lo) s) =>
+               (Lsh32x16 <typ.UInt32> (Int64Lo x) s))
+(Lsh64x8 x s) =>
        (Int64Make
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Lsh32x8 <typ.UInt32> hi s)
+                               (Lsh32x8 <typ.UInt32> (Int64Hi x) s)
                                (Rsh32Ux8 <typ.UInt32>
-                                       lo
+                                       (Int64Lo x)
                                        (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
                        (Lsh32x8 <typ.UInt32>
-                               lo
+                               (Int64Lo x)
                                (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))))
-               (Lsh32x8 <typ.UInt32> lo s))
+               (Lsh32x8 <typ.UInt32> (Int64Lo x) s))
 
 // 64x unsigned right shift
 // result.hi = hi>>s
 // result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
-(Rsh64Ux32 (Int64Make hi lo) s) =>
+(Rsh64Ux32 x s) =>
        (Int64Make
-               (Rsh32Ux32 <typ.UInt32> hi s)
+               (Rsh32Ux32 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux32 <typ.UInt32> lo s)
+                               (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x32 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
                        (Rsh32Ux32 <typ.UInt32>
-                               hi
+                               (Int64Hi x)
                                (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))))
-(Rsh64Ux16 (Int64Make hi lo) s) =>
+(Rsh64Ux16 x s) =>
        (Int64Make
-               (Rsh32Ux16 <typ.UInt32> hi s)
+               (Rsh32Ux16 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux16 <typ.UInt32> lo s)
+                               (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x16 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
                        (Rsh32Ux16 <typ.UInt32>
-                               hi
+                               (Int64Hi x)
                                (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))))
-(Rsh64Ux8 (Int64Make hi lo) s) =>
+(Rsh64Ux8 x s) =>
        (Int64Make
-               (Rsh32Ux8 <typ.UInt32> hi s)
+               (Rsh32Ux8 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux8 <typ.UInt32> lo s)
+                               (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x8 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
                        (Rsh32Ux8 <typ.UInt32>
-                               hi
+                               (Int64Hi x)
                                (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))))
 
 // 64x signed right shift
 // result.hi = hi>>s
 // result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
-(Rsh64x32 (Int64Make hi lo) s) =>
+(Rsh64x32 x s) =>
        (Int64Make
-               (Rsh32x32 <typ.UInt32> hi s)
+               (Rsh32x32 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux32 <typ.UInt32> lo s)
+                               (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x32 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
                        (And32 <typ.UInt32>
                                (Rsh32x32 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))
                                (Zeromask
                                        (Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5]))))))
-(Rsh64x16 (Int64Make hi lo) s) =>
+(Rsh64x16 x s) =>
        (Int64Make
-               (Rsh32x16 <typ.UInt32> hi s)
+               (Rsh32x16 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux16 <typ.UInt32> lo s)
+                               (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x16 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
                        (And32 <typ.UInt32>
                                (Rsh32x16 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))
                                (Zeromask
                                        (ZeroExt16to32
                                                (Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5])))))))
-(Rsh64x8 (Int64Make hi lo) s) =>
+(Rsh64x8 x s) =>
        (Int64Make
-               (Rsh32x8 <typ.UInt32> hi s)
+               (Rsh32x8 <typ.UInt32> (Int64Hi x) s)
                (Or32 <typ.UInt32>
                        (Or32 <typ.UInt32>
-                               (Rsh32Ux8 <typ.UInt32> lo s)
+                               (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
                                (Lsh32x8 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
                        (And32 <typ.UInt32>
                                (Rsh32x8 <typ.UInt32>
-                                       hi
+                                       (Int64Hi x)
                                        (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))
                                (Zeromask
                                        (ZeroExt8to32
index 86fbc9901af3aacbde2a2126430bafb21aff199a..8b9753414f4a235f1aa02c18f613ae307d023ca5 100644 (file)
@@ -62,6 +62,8 @@ func rewriteValuedec64(v *Value) bool {
                return rewriteValuedec64_OpNeg64(v)
        case OpNeq64:
                return rewriteValuedec64_OpNeq64(v)
+       case OpOr32:
+               return rewriteValuedec64_OpOr32(v)
        case OpOr64:
                return rewriteValuedec64_OpOr64(v)
        case OpRsh16Ux64:
@@ -728,7 +730,23 @@ func rewriteValuedec64_OpLsh16x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Lsh16x64 x y)
+       // result: (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLsh16x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpLsh32x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -793,83 +811,97 @@ func rewriteValuedec64_OpLsh32x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Lsh32x64 x y)
+       // result: (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLsh32x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpLsh64x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Lsh64x16 (Int64Make hi lo) s)
-       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x16 <typ.UInt32> hi s) (Rsh32Ux16 <typ.UInt32> lo (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Lsh32x16 <typ.UInt32> lo (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))) (Lsh32x16 <typ.UInt32> lo s))
+       // match: (Lsh64x16 x s)
+       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x16 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux16 <typ.UInt32> (Int64Lo x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Lsh32x16 <typ.UInt32> (Int64Lo x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))) (Lsh32x16 <typ.UInt32> (Int64Lo x) s))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v2 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
-               v2.AddArg2(hi, s)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
-               v4 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v5 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
-               v5.AuxInt = int16ToAuxInt(32)
-               v4.AddArg2(v5, s)
-               v3.AddArg2(lo, v4)
-               v1.AddArg2(v2, v3)
-               v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
-               v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v7.AddArg2(s, v5)
-               v6.AddArg2(lo, v7)
-               v0.AddArg2(v1, v6)
+               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v3.AddArg(x)
+               v2.AddArg2(v3, s)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v6 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v7 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
+               v7.AuxInt = int16ToAuxInt(32)
+               v6.AddArg2(v7, s)
+               v4.AddArg2(v5, v6)
+               v1.AddArg2(v2, v4)
                v8 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
-               v8.AddArg2(lo, s)
-               v.AddArg2(v0, v8)
+               v9 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v9.AddArg2(s, v7)
+               v8.AddArg2(v5, v9)
+               v0.AddArg2(v1, v8)
+               v10 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
+               v10.AddArg2(v5, s)
+               v.AddArg2(v0, v10)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpLsh64x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Lsh64x32 (Int64Make hi lo) s)
-       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x32 <typ.UInt32> hi s) (Rsh32Ux32 <typ.UInt32> lo (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Lsh32x32 <typ.UInt32> lo (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))) (Lsh32x32 <typ.UInt32> lo s))
+       // match: (Lsh64x32 x s)
+       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x32 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux32 <typ.UInt32> (Int64Lo x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Lsh32x32 <typ.UInt32> (Int64Lo x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))) (Lsh32x32 <typ.UInt32> (Int64Lo x) s))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v2 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
-               v2.AddArg2(hi, s)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v4 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v5.AuxInt = int32ToAuxInt(32)
-               v4.AddArg2(v5, s)
-               v3.AddArg2(lo, v4)
-               v1.AddArg2(v2, v3)
-               v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
-               v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v7.AddArg2(s, v5)
-               v6.AddArg2(lo, v7)
-               v0.AddArg2(v1, v6)
+               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v3.AddArg(x)
+               v2.AddArg2(v3, s)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v6 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v7.AuxInt = int32ToAuxInt(32)
+               v6.AddArg2(v7, s)
+               v4.AddArg2(v5, v6)
+               v1.AddArg2(v2, v4)
                v8 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
-               v8.AddArg2(lo, s)
-               v.AddArg2(v0, v8)
+               v9 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v9.AddArg2(s, v7)
+               v8.AddArg2(v5, v9)
+               v0.AddArg2(v1, v8)
+               v10 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
+               v10.AddArg2(v5, s)
+               v.AddArg2(v0, v10)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpLsh64x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -934,45 +966,60 @@ func rewriteValuedec64_OpLsh64x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Lsh64x64 x y)
+       // result: (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLsh64x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpLsh64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Lsh64x8 (Int64Make hi lo) s)
-       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x8 <typ.UInt32> hi s) (Rsh32Ux8 <typ.UInt32> lo (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Lsh32x8 <typ.UInt32> lo (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))) (Lsh32x8 <typ.UInt32> lo s))
+       // match: (Lsh64x8 x s)
+       // result: (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Lsh32x8 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux8 <typ.UInt32> (Int64Lo x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Lsh32x8 <typ.UInt32> (Int64Lo x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))) (Lsh32x8 <typ.UInt32> (Int64Lo x) s))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
                v2 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
-               v2.AddArg2(hi, s)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
-               v4 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v5 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
-               v5.AuxInt = int8ToAuxInt(32)
-               v4.AddArg2(v5, s)
-               v3.AddArg2(lo, v4)
-               v1.AddArg2(v2, v3)
-               v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
-               v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v7.AddArg2(s, v5)
-               v6.AddArg2(lo, v7)
-               v0.AddArg2(v1, v6)
+               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v3.AddArg(x)
+               v2.AddArg2(v3, s)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v6 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v7 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
+               v7.AuxInt = int8ToAuxInt(32)
+               v6.AddArg2(v7, s)
+               v4.AddArg2(v5, v6)
+               v1.AddArg2(v2, v4)
                v8 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
-               v8.AddArg2(lo, s)
-               v.AddArg2(v0, v8)
+               v9 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v9.AddArg2(s, v7)
+               v8.AddArg2(v5, v9)
+               v0.AddArg2(v1, v8)
+               v10 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
+               v10.AddArg2(v5, s)
+               v.AddArg2(v0, v10)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpLsh8x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1037,7 +1084,23 @@ func rewriteValuedec64_OpLsh8x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Lsh8x64 x y)
+       // result: (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLsh8x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpMul64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1118,6 +1181,64 @@ func rewriteValuedec64_OpNeq64(v *Value) bool {
                return true
        }
 }
+func rewriteValuedec64_OpOr32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Or32 <typ.UInt32> (Zeromask (Const32 [c])) y)
+       // cond: c == 0
+       // result: y
+       for {
+               if v.Type != typ.UInt32 {
+                       break
+               }
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpZeromask {
+                               continue
+                       }
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpConst32 {
+                               continue
+                       }
+                       c := auxIntToInt32(v_0_0.AuxInt)
+                       y := v_1
+                       if !(c == 0) {
+                               continue
+                       }
+                       v.copyOf(y)
+                       return true
+               }
+               break
+       }
+       // match: (Or32 <typ.UInt32> (Zeromask (Const32 [c])) y)
+       // cond: c != 0
+       // result: (Const32 <typ.UInt32> [-1])
+       for {
+               if v.Type != typ.UInt32 {
+                       break
+               }
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       if v_0.Op != OpZeromask {
+                               continue
+                       }
+                       v_0_0 := v_0.Args[0]
+                       if v_0_0.Op != OpConst32 {
+                               continue
+                       }
+                       c := auxIntToInt32(v_0_0.AuxInt)
+                       if !(c != 0) {
+                               continue
+                       }
+                       v.reset(OpConst32)
+                       v.Type = typ.UInt32
+                       v.AuxInt = int32ToAuxInt(-1)
+                       return true
+               }
+               break
+       }
+       return false
+}
 func rewriteValuedec64_OpOr64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -1208,7 +1329,23 @@ func rewriteValuedec64_OpRsh16Ux64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh16Ux64 x y)
+       // result: (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh16Ux32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh16x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1276,7 +1413,23 @@ func rewriteValuedec64_OpRsh16x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh16x64 x y)
+       // result: (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh16x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh32Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1341,7 +1494,23 @@ func rewriteValuedec64_OpRsh32Ux64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh32Ux64 x y)
+       // result: (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh32Ux32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh32x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1407,83 +1576,97 @@ func rewriteValuedec64_OpRsh32x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh32x64 x y)
+       // result: (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh32x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh64Ux16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64Ux16 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32Ux16 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> lo s) (Lsh32x16 <typ.UInt32> hi (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Rsh32Ux16 <typ.UInt32> hi (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))))
+       // match: (Rsh64Ux16 x s)
+       // result: (Int64Make (Rsh32Ux16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Rsh32Ux16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v6 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
-               v6.AuxInt = int16ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v8.AddArg2(s, v6)
-               v7.AddArg2(hi, v8)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v8 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
+               v8.AuxInt = int16ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v10.AddArg2(s, v8)
+               v9.AddArg2(v1, v10)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh64Ux32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64Ux32 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32Ux32 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> lo s) (Lsh32x32 <typ.UInt32> hi (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Rsh32Ux32 <typ.UInt32> hi (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))))
+       // match: (Rsh64Ux32 x s)
+       // result: (Int64Make (Rsh32Ux32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Rsh32Ux32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v6.AuxInt = int32ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v8.AddArg2(s, v6)
-               v7.AddArg2(hi, v8)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v8.AuxInt = int32ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v10.AddArg2(s, v8)
+               v9.AddArg2(v1, v10)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh64Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1548,139 +1731,152 @@ func rewriteValuedec64_OpRsh64Ux64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh64Ux64 x y)
+       // result: (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh64Ux32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh64Ux8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64Ux8 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32Ux8 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> lo s) (Lsh32x8 <typ.UInt32> hi (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Rsh32Ux8 <typ.UInt32> hi (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))))
+       // match: (Rsh64Ux8 x s)
+       // result: (Int64Make (Rsh32Ux8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Rsh32Ux8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v6 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
-               v6.AuxInt = int8ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v8.AddArg2(s, v6)
-               v7.AddArg2(hi, v8)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v8 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
+               v8.AuxInt = int8ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v10.AddArg2(s, v8)
+               v9.AddArg2(v1, v10)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh64x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64x16 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32x16 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> lo s) (Lsh32x16 <typ.UInt32> hi (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (And32 <typ.UInt32> (Rsh32x16 <typ.UInt32> hi (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5])))))))
+       // match: (Rsh64x16 x s)
+       // result: (Int64Make (Rsh32x16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (And32 <typ.UInt32> (Rsh32x16 <typ.UInt32> (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5])))))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v6 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
-               v6.AuxInt = int16ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32)
-               v9 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
-               v9.AddArg2(s, v6)
-               v8.AddArg2(hi, v9)
-               v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
-               v11 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
-               v12 := b.NewValue0(v.Pos, OpRsh16Ux32, typ.UInt16)
-               v13 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v13.AuxInt = int32ToAuxInt(5)
-               v12.AddArg2(s, v13)
-               v11.AddArg(v12)
-               v10.AddArg(v11)
-               v7.AddArg2(v8, v10)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux16, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x16, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v8 := b.NewValue0(v.Pos, OpConst16, typ.UInt16)
+               v8.AuxInt = int16ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpRsh32x16, typ.UInt32)
+               v11 := b.NewValue0(v.Pos, OpSub16, typ.UInt16)
+               v11.AddArg2(s, v8)
+               v10.AddArg2(v1, v11)
+               v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v13 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
+               v14 := b.NewValue0(v.Pos, OpRsh16Ux32, typ.UInt16)
+               v15 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v15.AuxInt = int32ToAuxInt(5)
+               v14.AddArg2(s, v15)
+               v13.AddArg(v14)
+               v12.AddArg(v13)
+               v9.AddArg2(v10, v12)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh64x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64x32 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32x32 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> lo s) (Lsh32x32 <typ.UInt32> hi (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (And32 <typ.UInt32> (Rsh32x32 <typ.UInt32> hi (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))) (Zeromask (Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5]))))))
+       // match: (Rsh64x32 x s)
+       // result: (Int64Make (Rsh32x32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (And32 <typ.UInt32> (Rsh32x32 <typ.UInt32> (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))) (Zeromask (Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5]))))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v6.AuxInt = int32ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
-               v9 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
-               v9.AddArg2(s, v6)
-               v8.AddArg2(hi, v9)
-               v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
-               v11 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
-               v12 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v12.AuxInt = int32ToAuxInt(5)
-               v11.AddArg2(s, v12)
-               v10.AddArg(v11)
-               v7.AddArg2(v8, v10)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x32, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v8.AuxInt = int32ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
+               v11 := b.NewValue0(v.Pos, OpSub32, typ.UInt32)
+               v11.AddArg2(s, v8)
+               v10.AddArg2(v1, v11)
+               v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v13 := b.NewValue0(v.Pos, OpRsh32Ux32, typ.UInt32)
+               v14 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v14.AuxInt = int32ToAuxInt(5)
+               v13.AddArg2(s, v14)
+               v12.AddArg(v13)
+               v9.AddArg2(v10, v12)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh64x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1750,55 +1946,70 @@ func rewriteValuedec64_OpRsh64x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh64x64 x y)
+       // result: (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh64x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Rsh64x8 (Int64Make hi lo) s)
-       // result: (Int64Make (Rsh32x8 <typ.UInt32> hi s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> lo s) (Lsh32x8 <typ.UInt32> hi (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (And32 <typ.UInt32> (Rsh32x8 <typ.UInt32> hi (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))) (Zeromask (ZeroExt8to32 (Rsh8Ux32 <typ.UInt8> s (Const32 <typ.UInt32> [5])))))))
+       // match: (Rsh64x8 x s)
+       // result: (Int64Make (Rsh32x8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (And32 <typ.UInt32> (Rsh32x8 <typ.UInt32> (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))) (Zeromask (ZeroExt8to32 (Rsh8Ux32 <typ.UInt8> s (Const32 <typ.UInt32> [5])))))))
        for {
-               if v_0.Op != OpInt64Make {
-                       break
-               }
-               lo := v_0.Args[1]
-               hi := v_0.Args[0]
+               x := v_0
                s := v_1
                v.reset(OpInt64Make)
                v0 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32)
-               v0.AddArg2(hi, s)
-               v1 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, s)
                v2 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
-               v3 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
-               v3.AddArg2(lo, s)
-               v4 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v6 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
-               v6.AuxInt = int8ToAuxInt(32)
-               v5.AddArg2(v6, s)
-               v4.AddArg2(hi, v5)
-               v2.AddArg2(v3, v4)
-               v7 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
-               v8 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32)
-               v9 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
-               v9.AddArg2(s, v6)
-               v8.AddArg2(hi, v9)
-               v10 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
-               v11 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
-               v12 := b.NewValue0(v.Pos, OpRsh8Ux32, typ.UInt8)
-               v13 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v13.AuxInt = int32ToAuxInt(5)
-               v12.AddArg2(s, v13)
-               v11.AddArg(v12)
-               v10.AddArg(v11)
-               v7.AddArg2(v8, v10)
-               v1.AddArg2(v2, v7)
-               v.AddArg2(v0, v1)
+               v3 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v4 := b.NewValue0(v.Pos, OpRsh32Ux8, typ.UInt32)
+               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v5.AddArg(x)
+               v4.AddArg2(v5, s)
+               v6 := b.NewValue0(v.Pos, OpLsh32x8, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v8 := b.NewValue0(v.Pos, OpConst8, typ.UInt8)
+               v8.AuxInt = int8ToAuxInt(32)
+               v7.AddArg2(v8, s)
+               v6.AddArg2(v1, v7)
+               v3.AddArg2(v4, v6)
+               v9 := b.NewValue0(v.Pos, OpAnd32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpRsh32x8, typ.UInt32)
+               v11 := b.NewValue0(v.Pos, OpSub8, typ.UInt8)
+               v11.AddArg2(s, v8)
+               v10.AddArg2(v1, v11)
+               v12 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v13 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
+               v14 := b.NewValue0(v.Pos, OpRsh8Ux32, typ.UInt8)
+               v15 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v15.AuxInt = int32ToAuxInt(5)
+               v14.AddArg2(s, v15)
+               v13.AddArg(v14)
+               v12.AddArg(v13)
+               v9.AddArg2(v10, v12)
+               v2.AddArg2(v3, v9)
+               v.AddArg2(v0, v2)
                return true
        }
-       return false
 }
 func rewriteValuedec64_OpRsh8Ux64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1863,7 +2074,23 @@ func rewriteValuedec64_OpRsh8Ux64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh8Ux64 x y)
+       // result: (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh8Ux32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpRsh8x64(v *Value) bool {
        v_1 := v.Args[1]
@@ -1931,7 +2158,23 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
-       return false
+       // match: (Rsh8x64 x y)
+       // result: (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpRsh8x32)
+               v0 := b.NewValue0(v.Pos, OpOr32, typ.UInt32)
+               v1 := b.NewValue0(v.Pos, OpZeromask, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v3.AddArg(y)
+               v0.AddArg2(v1, v3)
+               v.AddArg2(x, v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpSignExt16to64(v *Value) bool {
        v_0 := v.Args[0]
@@ -2071,6 +2314,8 @@ func rewriteValuedec64_OpSub64(v *Value) bool {
 }
 func rewriteValuedec64_OpTrunc64to16(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (Trunc64to16 (Int64Make _ lo))
        // result: (Trunc32to16 lo)
        for {
@@ -2082,7 +2327,16 @@ func rewriteValuedec64_OpTrunc64to16(v *Value) bool {
                v.AddArg(lo)
                return true
        }
-       return false
+       // match: (Trunc64to16 x)
+       // result: (Trunc32to16 (Int64Lo x))
+       for {
+               x := v_0
+               v.reset(OpTrunc32to16)
+               v0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpTrunc64to32(v *Value) bool {
        v_0 := v.Args[0]
@@ -2096,10 +2350,19 @@ func rewriteValuedec64_OpTrunc64to32(v *Value) bool {
                v.copyOf(lo)
                return true
        }
-       return false
+       // match: (Trunc64to32 x)
+       // result: (Int64Lo x)
+       for {
+               x := v_0
+               v.reset(OpInt64Lo)
+               v.AddArg(x)
+               return true
+       }
 }
 func rewriteValuedec64_OpTrunc64to8(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (Trunc64to8 (Int64Make _ lo))
        // result: (Trunc32to8 lo)
        for {
@@ -2111,7 +2374,16 @@ func rewriteValuedec64_OpTrunc64to8(v *Value) bool {
                v.AddArg(lo)
                return true
        }
-       return false
+       // match: (Trunc64to8 x)
+       // result: (Trunc32to8 (Int64Lo x))
+       for {
+               x := v_0
+               v.reset(OpTrunc32to8)
+               v0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
 }
 func rewriteValuedec64_OpXor64(v *Value) bool {
        v_1 := v.Args[1]