From c4ef597c47a00c3f78916425153aefa171a3b12f Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 1 Feb 2017 14:27:40 -0500 Subject: [PATCH] cmd/compile: redo writebarrier pass SSA's writebarrier pass requires WB store ops are always at the end of a block. If we move write barrier insertion into SSA and emits normal Store ops when building SSA, this requirement becomes impractical -- it will create too many blocks for all the Store ops. Redo SSA's writebarrier pass, explicitly order values in store order, so it no longer needs this requirement. Updates #17583. Fixes #19067. Change-Id: I66e817e526affb7e13517d4245905300a90b7170 Reviewed-on: https://go-review.googlesource.com/36834 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/compile/internal/gc/ssa.go | 16 - src/cmd/compile/internal/ssa/nilcheck.go | 160 --------- src/cmd/compile/internal/ssa/schedule.go | 164 +++++++++ src/cmd/compile/internal/ssa/writebarrier.go | 344 +++++++++---------- 4 files changed, 332 insertions(+), 352 deletions(-) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index b9b3b80b52..78a1f6b48c 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3438,14 +3438,6 @@ func (s *state) insertWBmove(t *Type, left, right *ssa.Value, rightIsVolatile bo } val.Aux = &ssa.ExternSymbol{Typ: Types[TUINTPTR], Sym: Linksym(typenamesym(t))} s.vars[&memVar] = val - - // WB ops will be expanded to branches at writebarrier phase. - // To make it easy, we put WB ops at the end of a block, so - // that it does not need to split a block into two parts when - // expanding WB ops. - b := s.f.NewBlock(ssa.BlockPlain) - s.endBlock().AddEdgeTo(b) - s.startBlock(b) } // insertWBstore inserts the assignment *left = right including a write barrier. @@ -3466,14 +3458,6 @@ func (s *state) insertWBstore(t *Type, left, right *ssa.Value, skip skipMask) { } s.storeTypeScalars(t, left, right, skip) s.storeTypePtrsWB(t, left, right) - - // WB ops will be expanded to branches at writebarrier phase. - // To make it easy, we put WB ops at the end of a block, so - // that it does not need to split a block into two parts when - // expanding WB ops. - b := s.f.NewBlock(ssa.BlockPlain) - s.endBlock().AddEdgeTo(b) - s.startBlock(b) } // do *left = right for all scalar (non-pointer) parts of t. diff --git a/src/cmd/compile/internal/ssa/nilcheck.go b/src/cmd/compile/internal/ssa/nilcheck.go index aa6424fe41..ea6523d24c 100644 --- a/src/cmd/compile/internal/ssa/nilcheck.go +++ b/src/cmd/compile/internal/ssa/nilcheck.go @@ -227,163 +227,3 @@ func nilcheckelim2(f *Func) { // more unnecessary nil checks. Would fix test/nilptr3_ssa.go:157. } } - -// storeOrder orders values with respect to stores. That is, -// if v transitively depends on store s, v is ordered after s, -// otherwise v is ordered before s. -// Specifically, values are ordered like -// store1 -// NilCheck that depends on store1 -// other values that depends on store1 -// store2 -// NilCheck that depends on store2 -// other values that depends on store2 -// ... -// The order of non-store and non-NilCheck values are undefined -// (not necessarily dependency order). This should be cheaper -// than a full scheduling as done in schedule.go. -// Note that simple dependency order won't work: there is no -// dependency between NilChecks and values like IsNonNil. -// Auxiliary data structures are passed in as arguments, so -// that they can be allocated in the caller and be reused. -// This function takes care of reset them. -func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { - // find all stores - var stores []*Value // members of values that are store values - hasNilCheck := false - sset.clear() // sset is the set of stores that are used in other values - for _, v := range values { - if v.Type.IsMemory() { - stores = append(stores, v) - if v.Op == OpInitMem || v.Op == OpPhi { - continue - } - a := v.Args[len(v.Args)-1] - if v.Op == OpSelect1 { - a = a.Args[len(a.Args)-1] - } - sset.add(a.ID) // record that a is used - } - if v.Op == OpNilCheck { - hasNilCheck = true - } - } - if len(stores) == 0 || !hasNilCheck { - // there is no store or nilcheck, the order does not matter - return values - } - - f := stores[0].Block.Func - - // find last store, which is the one that is not used by other stores - var last *Value - for _, v := range stores { - if !sset.contains(v.ID) { - if last != nil { - f.Fatalf("two stores live simutaneously: %v and %v", v, last) - } - last = v - } - } - - // We assign a store number to each value. Store number is the - // index of the latest store that this value transitively depends. - // The i-th store in the current block gets store number 3*i. A nil - // check that depends on the i-th store gets store number 3*i+1. - // Other values that depends on the i-th store gets store number 3*i+2. - // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends - // is in the previous block (or no store at all, e.g. value is Const). - // First we assign the number to all stores by walking back the store chain, - // then assign the number to other values in DFS order. - count := make([]int32, 3*(len(stores)+1)) - sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once - for n, w := len(stores), last; n > 0; n-- { - storeNumber[w.ID] = int32(3 * n) - count[3*n]++ - sset.add(w.ID) - if w.Op == OpInitMem || w.Op == OpPhi { - if n != 1 { - f.Fatalf("store order is wrong: there are stores before %v", w) - } - break - } - if w.Op == OpSelect1 { - w = w.Args[0] - } - w = w.Args[len(w.Args)-1] - } - var stack []*Value - for _, v := range values { - if sset.contains(v.ID) { - // in sset means v is a store, or already pushed to stack, or already assigned a store number - continue - } - stack = append(stack, v) - sset.add(v.ID) - - for len(stack) > 0 { - w := stack[len(stack)-1] - if storeNumber[w.ID] != 0 { - stack = stack[:len(stack)-1] - continue - } - if w.Op == OpPhi { - // Phi value doesn't depend on store in the current block. - // Do this early to avoid dependency cycle. - storeNumber[w.ID] = 2 - count[2]++ - stack = stack[:len(stack)-1] - continue - } - - max := int32(0) // latest store dependency - argsdone := true - for _, a := range w.Args { - if a.Block != w.Block { - continue - } - if !sset.contains(a.ID) { - stack = append(stack, a) - sset.add(a.ID) - argsdone = false - continue - } - if storeNumber[a.ID]/3 > max { - max = storeNumber[a.ID] / 3 - } - } - if !argsdone { - continue - } - - n := 3*max + 2 - if w.Op == OpNilCheck { - n = 3*max + 1 - } - storeNumber[w.ID] = n - count[n]++ - stack = stack[:len(stack)-1] - } - } - - // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] - for i := range count { - if i == 0 { - continue - } - count[i] += count[i-1] - } - if count[len(count)-1] != int32(len(values)) { - f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) - } - - // place values in count-indexed bins, which are in the desired store order - order := make([]*Value, len(values)) - for _, v := range values { - s := storeNumber[v.ID] - order[count[s-1]] = v - count[s-1]++ - } - - return order -} diff --git a/src/cmd/compile/internal/ssa/schedule.go b/src/cmd/compile/internal/ssa/schedule.go index bd4d3299f2..35edd77b8d 100644 --- a/src/cmd/compile/internal/ssa/schedule.go +++ b/src/cmd/compile/internal/ssa/schedule.go @@ -277,3 +277,167 @@ func schedule(f *Func) { f.scheduled = true } + +// storeOrder orders values with respect to stores. That is, +// if v transitively depends on store s, v is ordered after s, +// otherwise v is ordered before s. +// Specifically, values are ordered like +// store1 +// NilCheck that depends on store1 +// other values that depends on store1 +// store2 +// NilCheck that depends on store2 +// other values that depends on store2 +// ... +// The order of non-store and non-NilCheck values are undefined +// (not necessarily dependency order). This should be cheaper +// than a full scheduling as done above. +// Note that simple dependency order won't work: there is no +// dependency between NilChecks and values like IsNonNil. +// Auxiliary data structures are passed in as arguments, so +// that they can be allocated in the caller and be reused. +// This function takes care of reset them. +func storeOrder(values []*Value, sset *sparseSet, storeNumber []int32) []*Value { + if len(values) == 0 { + return values + } + + f := values[0].Block.Func + + // find all stores + var stores []*Value // members of values that are store values + hasNilCheck := false + sset.clear() // sset is the set of stores that are used in other values + for _, v := range values { + if v.Type.IsMemory() { + stores = append(stores, v) + if v.Op == OpInitMem || v.Op == OpPhi { + continue + } + a := v.Args[len(v.Args)-1] + if v.Op == OpSelect1 { + a = a.Args[len(a.Args)-1] + } + sset.add(a.ID) // record that a is used + } + if v.Op == OpNilCheck { + hasNilCheck = true + } + } + if len(stores) == 0 || !hasNilCheck && f.pass.name == "nilcheckelim" { + // there is no store, the order does not matter + return values + } + + // find last store, which is the one that is not used by other stores + var last *Value + for _, v := range stores { + if !sset.contains(v.ID) { + if last != nil { + f.Fatalf("two stores live simutaneously: %v and %v", v, last) + } + last = v + } + } + + // We assign a store number to each value. Store number is the + // index of the latest store that this value transitively depends. + // The i-th store in the current block gets store number 3*i. A nil + // check that depends on the i-th store gets store number 3*i+1. + // Other values that depends on the i-th store gets store number 3*i+2. + // Special case: 0 -- unassigned, 1 or 2 -- the latest store it depends + // is in the previous block (or no store at all, e.g. value is Const). + // First we assign the number to all stores by walking back the store chain, + // then assign the number to other values in DFS order. + count := make([]int32, 3*(len(stores)+1)) + sset.clear() // reuse sparse set to ensure that a value is pushed to stack only once + for n, w := len(stores), last; n > 0; n-- { + storeNumber[w.ID] = int32(3 * n) + count[3*n]++ + sset.add(w.ID) + if w.Op == OpInitMem || w.Op == OpPhi { + if n != 1 { + f.Fatalf("store order is wrong: there are stores before %v", w) + } + break + } + if w.Op == OpSelect1 { + w = w.Args[0] + } + w = w.Args[len(w.Args)-1] + } + var stack []*Value + for _, v := range values { + if sset.contains(v.ID) { + // in sset means v is a store, or already pushed to stack, or already assigned a store number + continue + } + stack = append(stack, v) + sset.add(v.ID) + + for len(stack) > 0 { + w := stack[len(stack)-1] + if storeNumber[w.ID] != 0 { + stack = stack[:len(stack)-1] + continue + } + if w.Op == OpPhi { + // Phi value doesn't depend on store in the current block. + // Do this early to avoid dependency cycle. + storeNumber[w.ID] = 2 + count[2]++ + stack = stack[:len(stack)-1] + continue + } + + max := int32(0) // latest store dependency + argsdone := true + for _, a := range w.Args { + if a.Block != w.Block { + continue + } + if !sset.contains(a.ID) { + stack = append(stack, a) + sset.add(a.ID) + argsdone = false + continue + } + if storeNumber[a.ID]/3 > max { + max = storeNumber[a.ID] / 3 + } + } + if !argsdone { + continue + } + + n := 3*max + 2 + if w.Op == OpNilCheck { + n = 3*max + 1 + } + storeNumber[w.ID] = n + count[n]++ + stack = stack[:len(stack)-1] + } + } + + // convert count to prefix sum of counts: count'[i] = sum_{j<=i} count[i] + for i := range count { + if i == 0 { + continue + } + count[i] += count[i-1] + } + if count[len(count)-1] != int32(len(values)) { + f.Fatalf("storeOrder: value is missing, total count = %d, values = %v", count[len(count)-1], values) + } + + // place values in count-indexed bins, which are in the desired store order + order := make([]*Value, len(values)) + for _, v := range values { + s := storeNumber[v.ID] + order[count[s-1]] = v + count[s-1]++ + } + + return order +} diff --git a/src/cmd/compile/internal/ssa/writebarrier.go b/src/cmd/compile/internal/ssa/writebarrier.go index 899e4faded..d2539bd3b0 100644 --- a/src/cmd/compile/internal/ssa/writebarrier.go +++ b/src/cmd/compile/internal/ssa/writebarrier.go @@ -22,21 +22,18 @@ import ( // and a normal store will be used. // A sequence of WB stores for many pointer fields of a single type will // be emitted together, with a single branch. -// -// Expanding WB ops introduces new control flows, and we would need to -// split a block into two if there were values after WB ops, which would -// require scheduling the values. To avoid this complexity, when building -// SSA, we make sure that WB ops are always at the end of a block. We do -// this before fuse as it may merge blocks. It also helps to reduce -// number of blocks as fuse merges blocks introduced in this phase. func writebarrier(f *Func) { - var sb, sp, wbaddr *Value + var sb, sp, wbaddr, const0 *Value var writebarrierptr, typedmemmove, typedmemclr *obj.LSym - var storeWBs, others []*Value - var wbs *sparseSet - for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no WB stores - valueLoop: - for i, v := range b.Values { + var stores, after []*Value + var sset *sparseSet + var storeNumber []int32 + + for _, b := range f.Blocks { // range loop is safe since the blocks we added contain no stores to expand + // rewrite write barrier for stack writes to ordinary Store/Move/Zero, + // record presence of non-stack WB ops. + hasStore := false + for _, v := range b.Values { switch v.Op { case OpStoreWB, OpMoveWB, OpMoveWBVolatile, OpZeroWB: if IsStackAddr(v.Args[0]) { @@ -52,187 +49,182 @@ func writebarrier(f *Func) { } continue } + hasStore = true + break + } + } + if !hasStore { + continue + } - if wbaddr == nil { - // initalize global values for write barrier test and calls - // find SB and SP values in entry block - initln := f.Entry.Pos - for _, v := range f.Entry.Values { - if v.Op == OpSB { - sb = v - } - if v.Op == OpSP { - sp = v - } - } - if sb == nil { - sb = f.Entry.NewValue0(initln, OpSB, f.Config.fe.TypeUintptr()) - } - if sp == nil { - sp = f.Entry.NewValue0(initln, OpSP, f.Config.fe.TypeUintptr()) - } - wbsym := &ExternSymbol{Typ: f.Config.fe.TypeBool(), Sym: f.Config.fe.Syslook("writeBarrier")} - wbaddr = f.Entry.NewValue1A(initln, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb) - writebarrierptr = f.Config.fe.Syslook("writebarrierptr") - typedmemmove = f.Config.fe.Syslook("typedmemmove") - typedmemclr = f.Config.fe.Syslook("typedmemclr") - - wbs = f.newSparseSet(f.NumValues()) - defer f.retSparseSet(wbs) + if wbaddr == nil { + // lazily initialize global values for write barrier test and calls + // find SB and SP values in entry block + initpos := f.Entry.Pos + for _, v := range f.Entry.Values { + if v.Op == OpSB { + sb = v + } + if v.Op == OpSP { + sp = v } + if sb != nil && sp != nil { + break + } + } + if sb == nil { + sb = f.Entry.NewValue0(initpos, OpSB, f.Config.fe.TypeUintptr()) + } + if sp == nil { + sp = f.Entry.NewValue0(initpos, OpSP, f.Config.fe.TypeUintptr()) + } + wbsym := &ExternSymbol{Typ: f.Config.fe.TypeBool(), Sym: f.Config.fe.Syslook("writeBarrier")} + wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.fe.TypeUInt32().PtrTo(), wbsym, sb) + writebarrierptr = f.Config.fe.Syslook("writebarrierptr") + typedmemmove = f.Config.fe.Syslook("typedmemmove") + typedmemclr = f.Config.fe.Syslook("typedmemclr") + const0 = f.ConstInt32(initpos, f.Config.fe.TypeUInt32(), 0) - pos := v.Pos + // allocate auxiliary data structures for computing store order + sset = f.newSparseSet(f.NumValues()) + defer f.retSparseSet(sset) + storeNumber = make([]int32, f.NumValues()) + } - // there may be a sequence of WB stores in the current block. find them. - storeWBs = storeWBs[:0] - others = others[:0] - wbs.clear() - for _, w := range b.Values[i:] { - if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB { - storeWBs = append(storeWBs, w) - wbs.add(w.ID) - } else { - others = append(others, w) - } - } + // order values in store order + b.Values = storeOrder(b.Values, sset, storeNumber) - // make sure that no value in this block depends on WB stores - for _, w := range b.Values { - if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB { - continue - } - for _, a := range w.Args { - if wbs.contains(a.ID) { - f.Fatalf("value %v depends on WB store %v in the same block %v", w, a, b) - } - } + again: + // find the start and end of the last contiguous WB store sequence. + // a branch will be inserted there. values after it will be moved + // to a new block. + var last *Value + var start, end int + values := b.Values + for i := len(values) - 1; i >= 0; i-- { + w := values[i] + if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB { + if last == nil { + last = w + end = i + 1 } - - // find the memory before the WB stores - // this memory is not a WB store but it is used in a WB store. - var mem *Value - for _, w := range storeWBs { - a := w.Args[len(w.Args)-1] - if wbs.contains(a.ID) { - continue - } - if mem != nil { - b.Fatalf("two stores live simultaneously: %s, %s", mem, a) - } - mem = a + } else { + if last != nil { + start = i + 1 + break } + } + } + stores = append(stores[:0], b.Values[start:end]...) // copy to avoid aliasing + after = append(after[:0], b.Values[end:]...) + b.Values = b.Values[:start] - b.Values = append(b.Values[:i], others...) // move WB ops out of this block + // find the memory before the WB stores + mem := stores[0].Args[len(stores[0].Args)-1] + pos := stores[0].Pos + bThen := f.NewBlock(BlockPlain) + bElse := f.NewBlock(BlockPlain) + bEnd := f.NewBlock(b.Kind) + bThen.Pos = pos + bElse.Pos = pos + bEnd.Pos = b.Pos + b.Pos = pos - bThen := f.NewBlock(BlockPlain) - bElse := f.NewBlock(BlockPlain) - bEnd := f.NewBlock(b.Kind) - bThen.Pos = pos - bElse.Pos = pos - bEnd.Pos = pos + // set up control flow for end block + bEnd.SetControl(b.Control) + bEnd.Likely = b.Likely + for _, e := range b.Succs { + bEnd.Succs = append(bEnd.Succs, e) + e.b.Preds[e.i].b = bEnd + } - // set up control flow for end block - bEnd.SetControl(b.Control) - bEnd.Likely = b.Likely - for _, e := range b.Succs { - bEnd.Succs = append(bEnd.Succs, e) - e.b.Preds[e.i].b = bEnd - } + // set up control flow for write barrier test + // load word, test word, avoiding partial register write from load byte. + flag := b.NewValue2(pos, OpLoad, f.Config.fe.TypeUInt32(), wbaddr, mem) + flag = b.NewValue2(pos, OpNeq32, f.Config.fe.TypeBool(), flag, const0) + b.Kind = BlockIf + b.SetControl(flag) + b.Likely = BranchUnlikely + b.Succs = b.Succs[:0] + b.AddEdgeTo(bThen) + b.AddEdgeTo(bElse) + bThen.AddEdgeTo(bEnd) + bElse.AddEdgeTo(bEnd) - // set up control flow for write barrier test - // load word, test word, avoiding partial register write from load byte. - flag := b.NewValue2(pos, OpLoad, f.Config.fe.TypeUInt32(), wbaddr, mem) - const0 := f.ConstInt32(pos, f.Config.fe.TypeUInt32(), 0) - flag = b.NewValue2(pos, OpNeq32, f.Config.fe.TypeBool(), flag, const0) - b.Kind = BlockIf - b.SetControl(flag) - b.Likely = BranchUnlikely - b.Succs = b.Succs[:0] - b.AddEdgeTo(bThen) - b.AddEdgeTo(bElse) - bThen.AddEdgeTo(bEnd) - bElse.AddEdgeTo(bEnd) + // for each write barrier store, append write barrier version to bThen + // and simple store version to bElse + memThen := mem + memElse := mem + for _, w := range stores { + var val *Value + ptr := w.Args[0] + siz := w.AuxInt + typ := w.Aux // only non-nil for MoveWB, MoveWBVolatile, ZeroWB + pos = w.Pos - memThen := mem - memElse := mem - for _, w := range storeWBs { - var val *Value - ptr := w.Args[0] - siz := w.AuxInt - typ := w.Aux // only non-nil for MoveWB, MoveWBVolatile, ZeroWB + var op Op + var fn *obj.LSym + switch w.Op { + case OpStoreWB: + op = OpStore + fn = writebarrierptr + val = w.Args[1] + case OpMoveWB, OpMoveWBVolatile: + op = OpMove + fn = typedmemmove + val = w.Args[1] + case OpZeroWB: + op = OpZero + fn = typedmemclr + } - var op Op - var fn *obj.LSym - switch w.Op { - case OpStoreWB: - op = OpStore - fn = writebarrierptr - val = w.Args[1] - case OpMoveWB, OpMoveWBVolatile: - op = OpMove - fn = typedmemmove - val = w.Args[1] - case OpZeroWB: - op = OpZero - fn = typedmemclr - } + // then block: emit write barrier call + memThen = wbcall(pos, bThen, fn, typ, ptr, val, memThen, sp, sb, w.Op == OpMoveWBVolatile) - // then block: emit write barrier call - memThen = wbcall(pos, bThen, fn, typ, ptr, val, memThen, sp, sb, w.Op == OpMoveWBVolatile) + // else block: normal store + if op == OpZero { + memElse = bElse.NewValue2I(pos, op, TypeMem, siz, ptr, memElse) + } else { + memElse = bElse.NewValue3I(pos, op, TypeMem, siz, ptr, val, memElse) + } - // else block: normal store - if op == OpZero { - memElse = bElse.NewValue2I(pos, op, TypeMem, siz, ptr, memElse) - } else { - memElse = bElse.NewValue3I(pos, op, TypeMem, siz, ptr, val, memElse) - } - } + if f.Config.fe.Debug_wb() { + f.Config.Warnl(pos, "write barrier") + } + } - // merge memory - // Splice memory Phi into the last memory of the original sequence, - // which may be used in subsequent blocks. Other memories in the - // sequence must be dead after this block since there can be only - // one memory live. - last := storeWBs[0] - if len(storeWBs) > 1 { - // find the last store - last = nil - wbs.clear() // we reuse wbs to record WB stores that is used in another WB store - for _, w := range storeWBs { - wbs.add(w.Args[len(w.Args)-1].ID) - } - for _, w := range storeWBs { - if wbs.contains(w.ID) { - continue - } - if last != nil { - b.Fatalf("two stores live simultaneously: %s, %s", last, w) - } - last = w - } - } - bEnd.Values = append(bEnd.Values, last) - last.Block = bEnd - last.reset(OpPhi) - last.Type = TypeMem - last.AddArg(memThen) - last.AddArg(memElse) - for _, w := range storeWBs { - if w != last { - w.resetArgs() - } - } - for _, w := range storeWBs { - if w != last { - f.freeValue(w) - } - } + // merge memory + // Splice memory Phi into the last memory of the original sequence, + // which may be used in subsequent blocks. Other memories in the + // sequence must be dead after this block since there can be only + // one memory live. + bEnd.Values = append(bEnd.Values, last) + last.Block = bEnd + last.reset(OpPhi) + last.Type = TypeMem + last.AddArg(memThen) + last.AddArg(memElse) + for _, w := range stores { + if w != last { + w.resetArgs() + } + } + for _, w := range stores { + if w != last { + f.freeValue(w) + } + } - if f.Config.fe.Debug_wb() { - f.Config.Warnl(pos, "write barrier") - } + // put values after the store sequence into the end block + bEnd.Values = append(bEnd.Values, after...) + for _, w := range after { + w.Block = bEnd + } - break valueLoop + // if we have more stores in this block, do this block again + for _, w := range b.Values { + if w.Op == OpStoreWB || w.Op == OpMoveWB || w.Op == OpMoveWBVolatile || w.Op == OpZeroWB { + goto again } } } -- 2.48.1