// more unnecessary nil checks. Would fix test/nilptr3_ssa.go:157.
}
}
-
-// storeOrder orders values with respect to stores. That is,
-// if v transitively depends on store s, v is ordered after s,
-// otherwise v is ordered before s.
-// Specifically, values are ordered like
-// store1
-// NilCheck that depends on store1
-// other values that depends on store1
-// store2
-// NilCheck that depends on store2
-// other values that depends on store2
-// ...
-// The order of non-store and non-NilCheck values are undefined
-// (not necessarily dependency order). This should be cheaper
-// than a full scheduling as done in schedule.go.
-// Note that simple dependency order won't work: there is no
-// dependency between NilChecks and values like IsNonNil.
-// Auxiliary data structures are passed in as arguments, so
-// that they can be allocated in the caller and be reused.
-// This function takes care of reset them.
-func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value {
- // find all stores
- var stores []*Value // members of values that are store values
- hasNilCheck := false
- sset.clear() // sset is the set of stores that are used in other values
- for _, v := range values {
- if v.Type.IsMemory() {
- stores = append(stores, v)
- if v.Op == OpInitMem || v.Op == OpPhi {
- continue
- }
- a := v.Args[len(v.Args)-1]
- if v.Op == OpSelect1 {
- a = a.Args[len(a.Args)-1]
- }
- sset.add(a.ID) // record that a is used
- }
- if v.Op == OpNilCheck {
- hasNilCheck = true
- }
- }
- if len(stores) == 0 || !hasNilCheck {
- // there is no store or nilcheck, the order does not matter
- return values
- }
-
- f := stores[0].Block.Func
-
- // find last store, which is the one that is not used by other stores
- var last *Value
- for _, v := range stores {
- if !sset.contains(v.ID) {
- if last != nil {
- f.Fatalf("two stores live simutaneously: %v and %v", v, last)
- }
- last = v
- }
- }
-
- // We assign a store number to each value. Store number is the
- // index of the latest store that this value transitively depends.
- // The i-th store in the current block gets store number 3*i. A nil
- // check that depends on the i-th store gets store number 3*i+1.
- // Other values that depends on the i-th store gets store number 3*i+2.
- // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends
- // is in the previous block (or no store at all, e.g. value is Const).
- // First we assign the number to all stores by walking back the store chain,
- // then assign the number to other values in DFS order.
- count := make([]int32, 3*(len(stores)+1))
- sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once
- for n, w := len(stores), last; n > 0; n-- {
- storeNumber[w.ID] = int32(3 * n)
- count[3*n]++
- sset.add(w.ID)
- if w.Op == OpInitMem || w.Op == OpPhi {
- if n != 1 {
- f.Fatalf("store order is wrong: there are stores before %v", w)
- }
- break
- }
- if w.Op == OpSelect1 {
- w = w.Args[0]
- }
- w = w.Args[len(w.Args)-1]
- }
- var stack []*Value
- for _, v := range values {
- if sset.contains(v.ID) {
- // in sset means v is a store, or already pushed to stack, or already assigned a store number
- continue
- }
- stack = append(stack, v)
- sset.add(v.ID)
-
- for len(stack) > 0 {
- w := stack[len(stack)-1]
- if storeNumber[w.ID] != 0 {
- stack = stack[:len(stack)-1]
- continue
- }
- if w.Op == OpPhi {
- // Phi value doesn't depend on store in the current block.
- // Do this early to avoid dependency cycle.
- storeNumber[w.ID] = 2
- count[2]++
- stack = stack[:len(stack)-1]
- continue
- }
-
- max := int32(0) // latest store dependency
- argsdone := true
- for _, a := range w.Args {
- if a.Block != w.Block {
- continue
- }
- if !sset.contains(a.ID) {
- stack = append(stack, a)
- sset.add(a.ID)
- argsdone = false
- continue
- }
- if storeNumber[a.ID]/3 > max {
- max = storeNumber[a.ID] / 3
- }
- }
- if !argsdone {
- continue
- }
-
- n := 3*max + 2
- if w.Op == OpNilCheck {
- n = 3*max + 1
- }
- storeNumber[w.ID] = n
- count[n]++
- stack = stack[:len(stack)-1]
- }
- }
-
- // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i]
- for i := range count {
- if i == 0 {
- continue
- }
- count[i] += count[i-1]
- }
- if count[len(count)-1] != int32(len(values)) {
- f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values)
- }
-
- // place values in count-indexed bins, which are in the desired store order
- order := make([]*Value, len(values))
- for _, v := range values {
- s := storeNumber[v.ID]
- order[count[s-1]] = v
- count[s-1]++
- }
-
- return order
-}
f.scheduled = true
}
+
+// storeOrder orders values with respect to stores. That is,
+// if v transitively depends on store s, v is ordered after s,
+// otherwise v is ordered before s.
+// Specifically, values are ordered like
+// store1
+// NilCheck that depends on store1
+// other values that depends on store1
+// store2
+// NilCheck that depends on store2
+// other values that depends on store2
+// ...
+// The order of non-store and non-NilCheck values are undefined
+// (not necessarily dependency order). This should be cheaper
+// than a full scheduling as done above.
+// Note that simple dependency order won't work: there is no
+// dependency between NilChecks and values like IsNonNil.
+// Auxiliary data structures are passed in as arguments, so
+// that they can be allocated in the caller and be reused.
+// This function takes care of reset them.
+func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value {
+ if len(values) == 0 {
+ return values
+ }
+
+ f := values[0].Block.Func
+
+ // find all stores
+ var stores []*Value // members of values that are store values
+ hasNilCheck := false
+ sset.clear() // sset is the set of stores that are used in other values
+ for _, v := range values {
+ if v.Type.IsMemory() {
+ stores = append(stores, v)
+ if v.Op == OpInitMem || v.Op == OpPhi {
+ continue
+ }
+ a := v.Args[len(v.Args)-1]
+ if v.Op == OpSelect1 {
+ a = a.Args[len(a.Args)-1]
+ }
+ sset.add(a.ID) // record that a is used
+ }
+ if v.Op == OpNilCheck {
+ hasNilCheck = true
+ }
+ }
+ if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" {
+ // there is no store, the order does not matter
+ return values
+ }
+
+ // find last store, which is the one that is not used by other stores
+ var last *Value
+ for _, v := range stores {
+ if !sset.contains(v.ID) {
+ if last != nil {
+ f.Fatalf("two stores live simutaneously: %v and %v", v, last)
+ }
+ last = v
+ }
+ }
+
+ // We assign a store number to each value. Store number is the
+ // index of the latest store that this value transitively depends.
+ // The i-th store in the current block gets store number 3*i. A nil
+ // check that depends on the i-th store gets store number 3*i+1.
+ // Other values that depends on the i-th store gets store number 3*i+2.
+ // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends
+ // is in the previous block (or no store at all, e.g. value is Const).
+ // First we assign the number to all stores by walking back the store chain,
+ // then assign the number to other values in DFS order.
+ count := make([]int32, 3*(len(stores)+1))
+ sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once
+ for n, w := len(stores), last; n > 0; n-- {
+ storeNumber[w.ID] = int32(3 * n)
+ count[3*n]++
+ sset.add(w.ID)
+ if w.Op == OpInitMem || w.Op == OpPhi {
+ if n != 1 {
+ f.Fatalf("store order is wrong: there are stores before %v", w)
+ }
+ break
+ }
+ if w.Op == OpSelect1 {
+ w = w.Args[0]
+ }
+ w = w.Args[len(w.Args)-1]
+ }
+ var stack []*Value
+ for _, v := range values {
+ if sset.contains(v.ID) {
+ // in sset means v is a store, or already pushed to stack, or already assigned a store number
+ continue
+ }
+ stack = append(stack, v)
+ sset.add(v.ID)
+
+ for len(stack) > 0 {
+ w := stack[len(stack)-1]
+ if storeNumber[w.ID] != 0 {
+ stack = stack[:len(stack)-1]
+ continue
+ }
+ if w.Op == OpPhi {
+ // Phi value doesn't depend on store in the current block.
+ // Do this early to avoid dependency cycle.
+ storeNumber[w.ID] = 2
+ count[2]++
+ stack = stack[:len(stack)-1]
+ continue
+ }
+
+ max := int32(0) // latest store dependency
+ argsdone := true
+ for _, a := range w.Args {
+ if a.Block != w.Block {
+ continue
+ }
+ if !sset.contains(a.ID) {
+ stack = append(stack, a)
+ sset.add(a.ID)
+ argsdone = false
+ continue
+ }
+ if storeNumber[a.ID]/3 > max {
+ max = storeNumber[a.ID] / 3
+ }
+ }
+ if !argsdone {
+ continue
+ }
+
+ n := 3*max + 2
+ if w.Op == OpNilCheck {
+ n = 3*max + 1
+ }
+ storeNumber[w.ID] = n
+ count[n]++
+ stack = stack[:len(stack)-1]
+ }
+ }
+
+ // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i]
+ for i := range count {
+ if i == 0 {
+ continue
+ }
+ count[i] += count[i-1]
+ }
+ if count[len(count)-1] != int32(len(values)) {
+ f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values)
+ }
+
+ // place values in count-indexed bins, which are in the desired store order
+ order := make([]*Value, len(values))
+ for _, v := range values {
+ s := storeNumber[v.ID]
+ order[count[s-1]] = v
+ count[s-1]++
+ }
+
+ return order
+}
// and a normal store will be used.
// A sequence of WB stores for many pointer fields of a single type will
// be emitted together, with a single branch.
-//
-// Expanding WB ops introduces new control flows, and we would need to
-// split a block into two if there were values after WB ops, which would
-// require scheduling the values. To avoid this complexity, when building
-// SSA, we make sure that WB ops are always at the end of a block. We do
-// this before fuse as it may merge blocks. It also helps to reduce
-// number of blocks as fuse merges blocks introduced in this phase.
func writebarrier(f *Func) {
- var sb, sp, wbaddr *Value
+ var sb, sp, wbaddr, const0 *Value
var writebarrierptr, typedmemmove, typedmemclr *obj.LSym
- var storeWBs, others []*Value
- var wbs *sparseSet
- for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no WB stores
- valueLoop:
- for i, v := range b.Values {
+ var stores, after []*Value
+ var sset *sparseSet
+ var storeNumber []int32
+
+ for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand
+ // rewrite write barrier for stack writes to ordinary Store/Move/Zero,
+ // record presence of non-stack WB ops.
+ hasStore := false
+ for _, v := range b.Values {
switch v.Op {
case OpStoreWB, OpMoveWB, OpMoveWBVolatile, OpZeroWB:
if IsStackAddr(v.Args[0]) {
}
continue
}
+ hasStore = true
+ break
+ }
+ }
+ if !hasStore {
+ continue
+ }
- if wbaddr == nil {
- // initalize global values for write barrier test and calls
- // find SB and SP values in entry block
- initln := f.Entry.Pos
- for _, v := range f.Entry.Values {
- if v.Op == OpSB {
- sb = v
- }
- if v.Op == OpSP {
- sp = v
- }
- }
- if sb == nil {
- sb = f.Entry.NewValue0(initln, OpSB, f.Config.fe.TypeUintptr())
- }
- if sp == nil {
- sp = f.Entry.NewValue0(initln, OpSP, f.Config.fe.TypeUintptr())
- }
- wbsym := &ExternSymbol{Typ: f.Config.fe.TypeBool(), Sym: f.Config.fe.Syslook("writeBarrier")}
- wbaddr = f.Entry.NewValue1A(initln, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb)
- writebarrierptr = f.Config.fe.Syslook("writebarrierptr")
- typedmemmove = f.Config.fe.Syslook("typedmemmove")
- typedmemclr = f.Config.fe.Syslook("typedmemclr")
-
- wbs = f.newSparseSet(f.NumValues())
- defer f.retSparseSet(wbs)
+ if wbaddr == nil {
+ // lazily initialize global values for write barrier test and calls
+ // find SB and SP values in entry block
+ initpos := f.Entry.Pos
+ for _, v := range f.Entry.Values {
+ if v.Op == OpSB {
+ sb = v
+ }
+ if v.Op == OpSP {
+ sp = v
}
+ if sb != nil && sp != nil {
+ break
+ }
+ }
+ if sb == nil {
+ sb = f.Entry.NewValue0(initpos, OpSB, f.Config.fe.TypeUintptr())
+ }
+ if sp == nil {
+ sp = f.Entry.NewValue0(initpos, OpSP, f.Config.fe.TypeUintptr())
+ }
+ wbsym := &ExternSymbol{Typ: f.Config.fe.TypeBool(), Sym: f.Config.fe.Syslook("writeBarrier")}
+ wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb)
+ writebarrierptr = f.Config.fe.Syslook("writebarrierptr")
+ typedmemmove = f.Config.fe.Syslook("typedmemmove")
+ typedmemclr = f.Config.fe.Syslook("typedmemclr")
+ const0 = f.ConstInt32(initpos, f.Config.fe.TypeUInt32(), 0)
- pos := v.Pos
+ // allocate auxiliary data structures for computing store order
+ sset = f.newSparseSet(f.NumValues())
+ defer f.retSparseSet(sset)
+ storeNumber = make([]int32, f.NumValues())
+ }
- // there may be a sequence of WB stores in the current block. find them.
- storeWBs = storeWBs[:0]
- others = others[:0]
- wbs.clear()
- for _, w := range b.Values[i:] {
- if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
- storeWBs = append(storeWBs, w)
- wbs.add(w.ID)
- } else {
- others = append(others, w)
- }
- }
+ // order values in store order
+ b.Values = storeOrder(b.Values, sset, storeNumber)
- // make sure that no value in this block depends on WB stores
- for _, w := range b.Values {
- if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
- continue
- }
- for _, a := range w.Args {
- if wbs.contains(a.ID) {
- f.Fatalf("value %v depends on WB store %v in the same block %v", w, a, b)
- }
- }
+ again:
+ // find the start and end of the last contiguous WB store sequence.
+ // a branch will be inserted there. values after it will be moved
+ // to a new block.
+ var last *Value
+ var start, end int
+ values := b.Values
+ for i := len(values) - 1; i >= 0; i-- {
+ w := values[i]
+ if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
+ if last == nil {
+ last = w
+ end = i + 1
}
-
- // find the memory before the WB stores
- // this memory is not a WB store but it is used in a WB store.
- var mem *Value
- for _, w := range storeWBs {
- a := w.Args[len(w.Args)-1]
- if wbs.contains(a.ID) {
- continue
- }
- if mem != nil {
- b.Fatalf("two stores live simultaneously: %s, %s", mem, a)
- }
- mem = a
+ } else {
+ if last != nil {
+ start = i + 1
+ break
}
+ }
+ }
+ stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing
+ after = append(after[:0], b.Values[end:]...)
+ b.Values = b.Values[:start]
- b.Values = append(b.Values[:i], others...) // move WB ops out of this block
+ // find the memory before the WB stores
+ mem := stores[0].Args[len(stores[0].Args)-1]
+ pos := stores[0].Pos
+ bThen := f.NewBlock(BlockPlain)
+ bElse := f.NewBlock(BlockPlain)
+ bEnd := f.NewBlock(b.Kind)
+ bThen.Pos = pos
+ bElse.Pos = pos
+ bEnd.Pos = b.Pos
+ b.Pos = pos
- bThen := f.NewBlock(BlockPlain)
- bElse := f.NewBlock(BlockPlain)
- bEnd := f.NewBlock(b.Kind)
- bThen.Pos = pos
- bElse.Pos = pos
- bEnd.Pos = pos
+ // set up control flow for end block
+ bEnd.SetControl(b.Control)
+ bEnd.Likely = b.Likely
+ for _, e := range b.Succs {
+ bEnd.Succs = append(bEnd.Succs, e)
+ e.b.Preds[e.i].b = bEnd
+ }
- // set up control flow for end block
- bEnd.SetControl(b.Control)
- bEnd.Likely = b.Likely
- for _, e := range b.Succs {
- bEnd.Succs = append(bEnd.Succs, e)
- e.b.Preds[e.i].b = bEnd
- }
+ // set up control flow for write barrier test
+ // load word, test word, avoiding partial register write from load byte.
+ flag := b.NewValue2(pos, OpLoad, f.Config.fe.TypeUInt32(), wbaddr, mem)
+ flag = b.NewValue2(pos, OpNeq32, f.Config.fe.TypeBool(), flag, const0)
+ b.Kind = BlockIf
+ b.SetControl(flag)
+ b.Likely = BranchUnlikely
+ b.Succs = b.Succs[:0]
+ b.AddEdgeTo(bThen)
+ b.AddEdgeTo(bElse)
+ bThen.AddEdgeTo(bEnd)
+ bElse.AddEdgeTo(bEnd)
- // set up control flow for write barrier test
- // load word, test word, avoiding partial register write from load byte.
- flag := b.NewValue2(pos, OpLoad, f.Config.fe.TypeUInt32(), wbaddr, mem)
- const0 := f.ConstInt32(pos, f.Config.fe.TypeUInt32(), 0)
- flag = b.NewValue2(pos, OpNeq32, f.Config.fe.TypeBool(), flag, const0)
- b.Kind = BlockIf
- b.SetControl(flag)
- b.Likely = BranchUnlikely
- b.Succs = b.Succs[:0]
- b.AddEdgeTo(bThen)
- b.AddEdgeTo(bElse)
- bThen.AddEdgeTo(bEnd)
- bElse.AddEdgeTo(bEnd)
+ // for each write barrier store, append write barrier version to bThen
+ // and simple store version to bElse
+ memThen := mem
+ memElse := mem
+ for _, w := range stores {
+ var val *Value
+ ptr := w.Args[0]
+ siz := w.AuxInt
+ typ := w.Aux // only non-nil for MoveWB, MoveWBVolatile, ZeroWB
+ pos = w.Pos
- memThen := mem
- memElse := mem
- for _, w := range storeWBs {
- var val *Value
- ptr := w.Args[0]
- siz := w.AuxInt
- typ := w.Aux // only non-nil for MoveWB, MoveWBVolatile, ZeroWB
+ var op Op
+ var fn *obj.LSym
+ switch w.Op {
+ case OpStoreWB:
+ op = OpStore
+ fn = writebarrierptr
+ val = w.Args[1]
+ case OpMoveWB, OpMoveWBVolatile:
+ op = OpMove
+ fn = typedmemmove
+ val = w.Args[1]
+ case OpZeroWB:
+ op = OpZero
+ fn = typedmemclr
+ }
- var op Op
- var fn *obj.LSym
- switch w.Op {
- case OpStoreWB:
- op = OpStore
- fn = writebarrierptr
- val = w.Args[1]
- case OpMoveWB, OpMoveWBVolatile:
- op = OpMove
- fn = typedmemmove
- val = w.Args[1]
- case OpZeroWB:
- op = OpZero
- fn = typedmemclr
- }
+ // then block: emit write barrier call
+ memThen = wbcall(pos, bThen, fn, typ, ptr, val, memThen, sp, sb, w.Op == OpMoveWBVolatile)
- // then block: emit write barrier call
- memThen = wbcall(pos, bThen, fn, typ, ptr, val, memThen, sp, sb, w.Op == OpMoveWBVolatile)
+ // else block: normal store
+ if op == OpZero {
+ memElse = bElse.NewValue2I(pos, op, TypeMem, siz, ptr, memElse)
+ } else {
+ memElse = bElse.NewValue3I(pos, op, TypeMem, siz, ptr, val, memElse)
+ }
- // else block: normal store
- if op == OpZero {
- memElse = bElse.NewValue2I(pos, op, TypeMem, siz, ptr, memElse)
- } else {
- memElse = bElse.NewValue3I(pos, op, TypeMem, siz, ptr, val, memElse)
- }
- }
+ if f.Config.fe.Debug_wb() {
+ f.Config.Warnl(pos, "write barrier")
+ }
+ }
- // merge memory
- // Splice memory Phi into the last memory of the original sequence,
- // which may be used in subsequent blocks. Other memories in the
- // sequence must be dead after this block since there can be only
- // one memory live.
- last := storeWBs[0]
- if len(storeWBs) > 1 {
- // find the last store
- last = nil
- wbs.clear() // we reuse wbs to record WB stores that is used in another WB store
- for _, w := range storeWBs {
- wbs.add(w.Args[len(w.Args)-1].ID)
- }
- for _, w := range storeWBs {
- if wbs.contains(w.ID) {
- continue
- }
- if last != nil {
- b.Fatalf("two stores live simultaneously: %s, %s", last, w)
- }
- last = w
- }
- }
- bEnd.Values = append(bEnd.Values, last)
- last.Block = bEnd
- last.reset(OpPhi)
- last.Type = TypeMem
- last.AddArg(memThen)
- last.AddArg(memElse)
- for _, w := range storeWBs {
- if w != last {
- w.resetArgs()
- }
- }
- for _, w := range storeWBs {
- if w != last {
- f.freeValue(w)
- }
- }
+ // merge memory
+ // Splice memory Phi into the last memory of the original sequence,
+ // which may be used in subsequent blocks. Other memories in the
+ // sequence must be dead after this block since there can be only
+ // one memory live.
+ bEnd.Values = append(bEnd.Values, last)
+ last.Block = bEnd
+ last.reset(OpPhi)
+ last.Type = TypeMem
+ last.AddArg(memThen)
+ last.AddArg(memElse)
+ for _, w := range stores {
+ if w != last {
+ w.resetArgs()
+ }
+ }
+ for _, w := range stores {
+ if w != last {
+ f.freeValue(w)
+ }
+ }
- if f.Config.fe.Debug_wb() {
- f.Config.Warnl(pos, "write barrier")
- }
+ // put values after the store sequence into the end block
+ bEnd.Values = append(bEnd.Values, after...)
+ for _, w := range after {
+ w.Block = bEnd
+ }
- break valueLoop
+ // if we have more stores in this block, do this block again
+ for _, w := range b.Values {
+ if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB {
+ goto again
}
}
}