]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: fix late call expansion for SSA-able aggregate results and arguments
authorDavid Chase <drchase@google.com>
Mon, 27 Jul 2020 20:46:35 +0000 (16:46 -0400)
committerDavid Chase <drchase@google.com>
Thu, 1 Oct 2020 16:43:20 +0000 (16:43 +0000)
This change incorporates the decision that it should be possible to
run call expansion relatively late in the optimization chain, so that
(1) calls themselves can be exposed to useful optimizations
(2) the effect of selectors on aggregates is seen at the rewrite,
    so that assignment of parts into registers is less complicated
    (at least I hope it works that way).

That means that selectors feeding into SelectN need to be processed,
and Make* feeding into call parameters need to be processed.

This does however require that call expansion run before decompose
builtins.

This doesn't yet handle rewrites of strings, slices, interfaces,
and complex numbers.

Passes run.bash and race.bash

Change-Id: I71ff23d3c491043beb30e926949970c4f63ef1a4
Reviewed-on: https://go-review.googlesource.com/c/go/+/245133
Trust: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/compile.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/expand_calls.go
src/cmd/compile/internal/ssa/writebarrier.go

index 815ff7f99fb358c5aad6603d3f42cd900ac4757b..aebb40568c4336c2113f97fd96fe862657564370 100644 (file)
@@ -10,7 +10,6 @@ import (
        "html"
        "os"
        "sort"
-       "strings"
 
        "bufio"
        "bytes"
@@ -2560,19 +2559,19 @@ func (s *state) expr(n *Node) *ssa.Value {
                if s.prevCall == nil || s.prevCall.Op != ssa.OpStaticLECall {
                        // Do the old thing
                        addr := s.constOffPtrSP(types.NewPtr(n.Type), n.Xoffset)
-                       return s.load(n.Type, addr)
+                       return s.rawLoad(n.Type, addr)
                }
                which := s.prevCall.Aux.(*ssa.AuxCall).ResultForOffset(n.Xoffset)
                if which == -1 {
                        // Do the old thing // TODO: Panic instead.
                        addr := s.constOffPtrSP(types.NewPtr(n.Type), n.Xoffset)
-                       return s.load(n.Type, addr)
+                       return s.rawLoad(n.Type, addr)
                }
                if canSSAType(n.Type) {
                        return s.newValue1I(ssa.OpSelectN, n.Type, which, s.prevCall)
                } else {
                        addr := s.newValue1I(ssa.OpSelectNAddr, types.NewPtr(n.Type), which, s.prevCall)
-                       return s.load(n.Type, addr)
+                       return s.rawLoad(n.Type, addr)
                }
 
        case ODEREF:
@@ -4377,7 +4376,7 @@ func (s *state) call(n *Node, k callKind, returnResultAddr bool) *ssa.Value {
        case OCALLFUNC:
                if k == callNormal && fn.Op == ONAME && fn.Class() == PFUNC {
                        sym = fn.Sym
-                       if !returnResultAddr && strings.Contains(sym.Name, "testLateExpansion") {
+                       if !returnResultAddr && ssa.LateCallExpansionEnabledWithin(s.f) {
                                testLateExpansion = true
                        }
                        break
@@ -4394,7 +4393,7 @@ func (s *state) call(n *Node, k callKind, returnResultAddr bool) *ssa.Value {
                }
                if k == callNormal {
                        sym = fn.Sym
-                       if !returnResultAddr && strings.Contains(sym.Name, "testLateExpansion") {
+                       if !returnResultAddr && ssa.LateCallExpansionEnabledWithin(s.f) {
                                testLateExpansion = true
                        }
                        break
index 4eed612977e0abaee699895b62d855852fbacb1e..3dec1cd85bc8f377b1dcf245edec4a17e786964c 100644 (file)
@@ -431,9 +431,9 @@ var passes = [...]pass{
        {name: "nilcheckelim", fn: nilcheckelim},
        {name: "prove", fn: prove},
        {name: "early fuse", fn: fuseEarly},
+       {name: "expand calls", fn: expandCalls, required: true},
        {name: "decompose builtin", fn: decomposeBuiltIn, required: true},
        {name: "softfloat", fn: softfloat, required: true},
-       {name: "expand calls", fn:expandCalls, required: true},
        {name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
        {name: "dead auto elim", fn: elimDeadAutosGeneric},
        {name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
index 7f01f8047fb458d59759a4461383c60d455065ba..a73bcf8fcad90f7a25941400df69a7f8595866f2 100644 (file)
@@ -196,6 +196,14 @@ const (
        ClassParamOut                     // return value
 )
 
+const go116lateCallExpansion = true
+
+// LateCallExpansionEnabledWithin returns true if late call expansion should be tested
+// within compilation of a function/method triggered by GOSSAHASH (defaults to "yes").
+func LateCallExpansionEnabledWithin(f *Func) bool {
+       return go116lateCallExpansion && f.DebugTest // Currently set up for GOSSAHASH bug searches
+}
+
 // NewConfig returns a new configuration object for the given architecture.
 func NewConfig(arch string, types Types, ctxt *obj.Link, optimize bool) *Config {
        c := &Config{arch: arch, Types: types}
index 34cff51c00095a4bcc84ef0d0e5879734ecf38ea..8456dbab8d26c0373c15008bb5223fd0f3d9cd87 100644 (file)
@@ -4,14 +4,59 @@
 
 package ssa
 
-import "cmd/compile/internal/types"
+import (
+       "cmd/compile/internal/types"
+       "cmd/internal/src"
+       "fmt"
+       "sort"
+)
 
 // expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
-// that is more oriented to a platform's ABI.  The SelectN operations that extract results are also rewritten into
-// more appropriate forms.
+// that is more oriented to a platform's ABI.  The SelectN operations that extract results are rewritten into
+// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
+// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
+// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
+// split between registers and memory).
+//
+// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
 func expandCalls(f *Func) {
+       if !LateCallExpansionEnabledWithin(f) {
+               return
+       }
        canSSAType := f.fe.CanSSA
+       regSize := f.Config.RegSize
        sp, _ := f.spSb()
+
+       debug := f.pass.debug > 0
+
+       // For 32-bit, need to deal with decomposition of 64-bit integers
+       tUint32 := types.Types[types.TUINT32]
+       tInt32 := types.Types[types.TINT32]
+       var hiOffset, lowOffset int64
+       if f.Config.BigEndian {
+               lowOffset = 4
+       } else {
+               hiOffset = 4
+       }
+       pairTypes := func(et types.EType) (tHi, tLo *types.Type) {
+               tHi = tUint32
+               if et == types.TINT64 {
+                       tHi = tInt32
+               }
+               tLo = tUint32
+               return
+       }
+
+       // isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
+       // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
+       // Other aggregate types are expanded in decomposeBuiltin, which comes later.
+       isAlreadyExpandedAggregateType := func(t *types.Type) bool {
+               if !canSSAType(t) {
+                       return false
+               }
+               return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
+       }
+
        // Calls that need lowering have some number of inputs, including a memory input,
        // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
 
@@ -21,80 +66,329 @@ func expandCalls(f *Func) {
        // With the current ABI, the outputs need to be converted to loads, which will all use the call's
        // memory output as their input.
 
-       // Step 1: find all references to calls as values and rewrite those.
-       for _, b := range f.Blocks {
-               for _, v := range b.Values {
-                       switch v.Op {
-                       case OpSelectN:
-                               call := v.Args[0]
-                               aux := call.Aux.(*AuxCall)
-                               which := v.AuxInt
-                               t := v.Type
-                               if which == aux.NResults() { // mem is after the results.
-                                       // rewrite v as a Copy of call -- the replacement call will produce a mem.
-                                       v.copyOf(call)
-                               } else {
-                                       pt := types.NewPtr(t)
-                                       if canSSAType(t) {
-                                               off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
-                                               v.reset(OpLoad)
-                                               v.SetArgs2(off, call)
+       // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
+       // a chain of Struct/Array Select operations.  If the chain of selectors does not
+       // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
+       // It emits the code necessary to implement the leaf select operation that leads to the call.
+       // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
+       var rewriteSelect func(leaf *Value, selector *Value, offset int64)
+       rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
+               switch selector.Op {
+               case OpSelectN:
+                       // TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
+                       call := selector.Args[0]
+                       aux := call.Aux.(*AuxCall)
+                       which := selector.AuxInt
+                       if which == aux.NResults() { // mem is after the results.
+                               // rewrite v as a Copy of call -- the replacement call will produce a mem.
+                               leaf.copyOf(call)
+                       } else {
+                               leafType := leaf.Type
+                               pt := types.NewPtr(leafType)
+                               if canSSAType(leafType) {
+                                       off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
+                                       // Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
+                                       if leaf.Block == call.Block {
+                                               leaf.reset(OpLoad)
+                                               leaf.SetArgs2(off, call)
                                        } else {
-                                               panic("Should not have non-SSA-able OpSelectN")
+                                               w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
+                                               leaf.copyOf(w)
                                        }
+                               } else {
+                                       panic("Should not have non-SSA-able OpSelectN")
                                }
-                               v.Type = t // not right for the mem operand yet, but will be when call is rewritten.
+                       }
+               case OpStructSelect:
+                       w := selector.Args[0]
+                       if w.Type.Etype != types.TSTRUCT {
+                               fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+                       }
+                       rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
 
-                       case OpSelectNAddr:
-                               call := v.Args[0]
-                               which := v.AuxInt
-                               aux := call.Aux.(*AuxCall)
-                               pt := v.Type
-                               off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
-                               v.copyOf(off)
+               case OpInt64Hi:
+                       w := selector.Args[0]
+                       rewriteSelect(leaf, w, offset+hiOffset)
+
+               case OpInt64Lo:
+                       w := selector.Args[0]
+                       rewriteSelect(leaf, w, offset+lowOffset)
+
+               case OpArraySelect:
+                       w := selector.Args[0]
+                       rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
+               default:
+                       // Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
+               }
+       }
+
+       // storeArg converts stores of SSA-able aggregates into a series of stores of smaller types into
+       // individual parameter slots.
+       // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
+       var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
+       storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
+               switch a.Op {
+               case OpArrayMake0, OpStructMake0:
+                       return mem
+               case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
+                       for i := 0; i < t.NumFields(); i++ {
+                               fld := t.Field(i)
+                               mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
+                       }
+                       return mem
+               case OpArrayMake1:
+                       return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
+
+               case OpInt64Make:
+                       tHi, tLo := pairTypes(t.Etype)
+                       mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
+                       return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
+               }
+               dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
+               x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
+               if debug {
+                       fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
+               }
+               return x
+       }
+
+       // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+       // TODO should also optimize offsets from SB?
+       offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
+               pt := types.NewPtr(t)
+               if offset == 0 && dst.Type == pt { // this is not actually likely
+                       return dst
+               }
+               if dst.Op != OpOffPtr {
+                       return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
+               }
+               // Simplify OpOffPtr
+               from := dst.Args[0]
+               offset += dst.AuxInt
+               if from == sp {
+                       return f.ConstOffPtrSP(pt, offset, sp)
+               }
+               return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+       }
+
+       // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
+       // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
+       var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
+       splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
+               // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
+               pos := v.Pos.WithNotStmt()
+               switch t.Etype {
+               case types.TINT64, types.TUINT64:
+                       if t.Width == regSize {
+                               break
                        }
+                       tHi, tLo := pairTypes(t.Etype)
+                       sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
+                       mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
+                       firstStorePos = firstStorePos.WithNotStmt()
+                       sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
+                       return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+
+               case types.TARRAY:
+                       elt := t.Elem()
+                       for i := int64(0); i < t.NumElem(); i++ {
+                               sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
+                               mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
+                               firstStorePos = firstStorePos.WithNotStmt()
+                       }
+                       return mem
+               case types.TSTRUCT:
+                       if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize   {
+                               // This peculiar test deals with accesses to immediate interface data.
+                               // It works okay because everything is the same size.
+                               // Example code that triggers this can be found in go/constant/value.go, function ToComplex
+                               // v119 (+881) = IData <intVal> v6
+                               // v121 (+882) = StaticLECall <floatVal,mem> {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
+                               // This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
+                               // Guard against "struct{struct{*foo}}"
+                               for t.Etype == types.TSTRUCT && t.NumFields() == 1 {
+                                       t = t.Field(0).Type
+                               }
+                               if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
+                                       f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct in it")
+                               }
+                               break // handle the leaf type.
+                       }
+                       for i := 0; i < t.NumFields(); i++ {
+                               fld := t.Field(i)
+                               sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
+                               mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
+                               firstStorePos = firstStorePos.WithNotStmt()
+                       }
+                       return mem
                }
+               // Default, including for aggregates whose single element exactly fills their container
+               // TODO this will be a problem for cast interfaces containing floats when we move to registers.
+               x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
+               if debug {
+                       fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
+               }
+               return x
        }
 
-       // Step 2: rewrite the calls
+       // Step 0: rewrite the calls to convert incoming args to stores.
        for _, b := range f.Blocks {
                for _, v := range b.Values {
                        switch v.Op {
                        case OpStaticLECall:
                                // Thread the stores on the memory arg
-                               m0 := v.Args[len(v.Args)-1]
+                               m0 := v.MemoryArg()
                                mem := m0
                                pos := v.Pos.WithNotStmt()
                                aux := v.Aux.(*AuxCall)
-                               auxInt := v.AuxInt
                                for i, a := range v.Args {
-                                       if a == m0 {
+                                       if a == m0 { // mem is last.
                                                break
                                        }
                                        if a.Op == OpDereference {
                                                // "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
+                                               // TODO this will be more complicated with registers in the picture.
+                                               if a.MemoryArg() != m0 {
+                                                       f.Fatalf("Op...LECall and OpDereference have mismatched mem, %s and %s", v.LongString(), a.LongString())
+                                               }
                                                src := a.Args[0]
                                                dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(int64(i)), sp)
-                                               a.reset(OpMove)
-                                               a.Pos = pos
-                                               a.Type = types.TypeMem
-                                               a.Aux = aux.TypeOfArg(int64(i))
-                                               a.AuxInt = aux.SizeOfArg(int64(i))
-                                               a.SetArgs3(dst, src, mem)
-                                               mem = a
+                                               if a.Uses == 1 {
+                                                       a.reset(OpMove)
+                                                       a.Pos = pos
+                                                       a.Type = types.TypeMem
+                                                       a.Aux = aux.TypeOfArg(int64(i))
+                                                       a.AuxInt = aux.SizeOfArg(int64(i))
+                                                       a.SetArgs3(dst, src, mem)
+                                                       mem = a
+                                               } else {
+                                                       mem = a.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(int64(i)), dst, src, mem)
+                                                       mem.AuxInt = aux.SizeOfArg(int64(i))
+                                               }
                                        } else {
-                                               // Add a new store.
-                                               t := aux.TypeOfArg(int64(i))
-                                               dst := f.ConstOffPtrSP(types.NewPtr(t), aux.OffsetOfArg(int64(i)), sp)
-                                               mem = b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
+                                               mem = storeArg(pos, b, a, aux.TypeOfArg(int64(i)), aux.OffsetOfArg(int64(i)), mem)
                                        }
                                }
-                               v.reset(OpStaticCall)
-                               v.Type = types.TypeMem
-                               v.Aux = aux
-                               v.AuxInt = auxInt
+                               v.resetArgs()
                                v.SetArgs1(mem)
                        }
                }
        }
+
+       // Step 1: any stores of aggregates remaining are believed to be sourced from call results.
+       // Decompose those stores into a series of smaller stores, adding selection ops as necessary.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       if v.Op == OpStore {
+                               t := v.Aux.(*types.Type)
+                               if isAlreadyExpandedAggregateType(t) {
+                                       dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
+                                       mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
+                                       v.copyOf(mem)
+                               }
+                       }
+               }
+       }
+
+       val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
+
+       // Step 2: accumulate selection operations for rewrite in topological order.
+       // Any select-for-addressing applied to call results can be transformed directly.
+       // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       // Accumulate chains of selectors for processing in topological order
+                       switch v.Op {
+                       case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
+                               w := v.Args[0]
+                               switch w.Op {
+                               case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
+                                       val2Preds[w] += 1
+                                       if debug {
+                                               fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
+                                       }
+                               }
+                               fallthrough
+                       case OpSelectN:
+                               if _, ok := val2Preds[v]; !ok {
+                                       val2Preds[v] = 0
+                                       if debug {
+                                               fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+                                       }
+                               }
+                       case OpSelectNAddr:
+                               // Do these directly, there are no chains of selectors.
+                               call := v.Args[0]
+                               which := v.AuxInt
+                               aux := call.Aux.(*AuxCall)
+                               pt := v.Type
+                               off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
+                               v.copyOf(off)
+                       }
+               }
+       }
+
+       // Compilation must be deterministic
+       var ordered []*Value
+       less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
+
+       // Step 3: Rewrite in topological order.  All chains of selectors end up in same block as the call.
+       for len(val2Preds) > 0 {
+               ordered = ordered[:0]
+               for v, n := range val2Preds {
+                       if n == 0 {
+                               ordered = append(ordered, v)
+                       }
+               }
+               sort.Slice(ordered, less)
+               for _, v := range ordered {
+                       for {
+                               w := v.Args[0]
+                               if debug {
+                                       fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
+                               }
+                               delete(val2Preds, v)
+                               rewriteSelect(v, v, 0)
+                               v = w
+                               n, ok := val2Preds[v]
+                               if !ok {
+                                       break
+                               }
+                               if n != 1 {
+                                       val2Preds[v] = n - 1
+                                       break
+                               }
+                               // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
+                       }
+               }
+       }
+
+       // Step 4: rewrite the calls themselves, correcting the type
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       switch v.Op {
+                       case OpStaticLECall:
+                               v.Op = OpStaticCall
+                               v.Type = types.TypeMem
+                       }
+               }
+       }
+
+       // Step 5: elide any copies introduced.
+       for _, b := range f.Blocks {
+               for _, v := range b.Values {
+                       for i, a := range v.Args {
+                               if a.Op != OpCopy {
+                                       continue
+                               }
+                               aa := copySource(a)
+                               v.SetArg(i, aa)
+                               for a.Uses == 0 {
+                                       b := a.Args[0]
+                                       a.reset(OpInvalid)
+                                       a = b
+                               }
+                       }
+               }
+       }
 }
index df54a45b0f6c627f2afbd351a75dcf3d08e767ac..849c9e8967c60f73a60c2f2d8606c5ba68158f86 100644 (file)
@@ -527,7 +527,7 @@ func IsStackAddr(v *Value) bool {
                v = v.Args[0]
        }
        switch v.Op {
-       case OpSP, OpLocalAddr:
+       case OpSP, OpLocalAddr, OpSelectNAddr:
                return true
        }
        return false
@@ -593,7 +593,7 @@ func IsSanitizerSafeAddr(v *Value) bool {
                v = v.Args[0]
        }
        switch v.Op {
-       case OpSP, OpLocalAddr:
+       case OpSP, OpLocalAddr, OpSelectNAddr:
                // Stack addresses are always safe.
                return true
        case OpITab, OpStringPtr, OpGetClosurePtr:
@@ -609,7 +609,7 @@ func IsSanitizerSafeAddr(v *Value) bool {
 // isVolatile reports whether v is a pointer to argument region on stack which
 // will be clobbered by a function call.
 func isVolatile(v *Value) bool {
-       for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy {
+       for v.Op == OpOffPtr || v.Op == OpAddPtr || v.Op == OpPtrIndex || v.Op == OpCopy || v.Op == OpSelectNAddr {
                v = v.Args[0]
        }
        return v.Op == OpSP