last := f.Cache.allocBoolSlice(f.NumValues())
defer f.Cache.freeBoolSlice(last)
+ type stChainElem struct {
+ v *Value
+ i int // Index in chain (0 == last store)
+ }
+ var order []stChainElem
+
// prevStore returns the previous store in the
// same block, or nil if there are none.
prevStore := func(v *Value) *Value {
return m
}
+ // storeWidth returns the width of store,
+ // or 0 if it is not a store
+ storeWidth := func(op Op) int64 {
+ var width int64
+ switch op {
+ case OpARM64MOVDstore, OpARM64FMOVDstore:
+ width = 8
+ case OpARM64MOVWstore, OpARM64FMOVSstore:
+ width = 4
+ case OpARM64MOVHstore:
+ width = 2
+ case OpARM64MOVBstore:
+ width = 1
+ default:
+ width = 0
+ }
+ return width
+ }
+
+ const limit = 10
+
for _, b := range f.Blocks {
// Find last store in block, so we can
// walk the stores last to first.
}
}
+ order = order[:0]
+ for i, v := 0, lastMem; v != nil; v = prevStore(v) {
+ order = append(order, stChainElem{v, i})
+ i++
+ }
+ reordering:
+ for i, v_elem := range order {
+ v := v_elem.v
+ if v.Uses != 1 {
+ // We can't reorder stores if the earlier
+ // store has any use besides the next one
+ // in the store chain.
+ // (Unless we could check the aliasing of
+ // all those other uses.)
+ continue
+ }
+ widthV := storeWidth(v.Op)
+ if widthV == 0 {
+ // Can't reorder with any other memory operations.
+ // (atomics, calls, ...)
+ continue
+ }
+ chain := order[i+1:]
+ count := limit
+ // Var 'count' keeps us in O(n) territory
+ for j, w_elem := range chain {
+ if count--; count == 0 {
+ // Only look back so far.
+ // This keeps us in O(n) territory, and it
+ // also prevents us from keeping values
+ // in registers for too long (and thus
+ // needing to spill them).
+ continue reordering
+ }
+
+ w := w_elem.v
+ if w.Uses != 1 {
+ // We can't reorder stores if the earlier
+ // store has any use besides the next one
+ // in the store chain.
+ // (Unless we could check the aliasing of
+ // all those other uses.)
+ continue reordering
+ }
+
+ widthW := storeWidth(w.Op)
+ if widthW == 0 {
+ // Can't reorder with any other memory operations.
+ // (atomics, calls, ...)
+ continue reordering
+ }
+
+ // We only allow reordering with respect to other
+ // writes to the same pointer and aux, so we can
+ // compute the exact the aliasing relationship.
+ if w.Args[0] != v.Args[0] ||
+ w.Aux != v.Aux {
+ // Can't reorder with operation with incomparable destination memory pointer.
+ continue reordering
+ }
+ if overlap(w.AuxInt, widthW, v.AuxInt, widthV) {
+ // Aliases with the same slot with v's location.
+ continue reordering
+ }
+
+ // Reordering stores in increasing order of memory access
+ if v.AuxInt < w.AuxInt {
+ order[i], order[i+j+1] = order[i+j+1], order[i]
+ v = w
+ widthV = widthW
+ }
+ }
+ }
+
// Check all stores, from last to first.
memCheck:
- for v := lastMem; v != nil; v = prevStore(v) {
+ for i, v_elem := range order {
+ v := v_elem.v
info := pairableStores[v.Op]
if info.width == 0 {
continue // Not pairable.
// Look for earlier store we can combine with.
lowerOk := true
higherOk := true
- count := 10 // max lookback distance
- for w := prevStore(v); w != nil; w = prevStore(w) {
+ count := limit // max lookback distance
+ chain := order[i+1:]
+ for _, w_elem := range chain {
+ w := w_elem.v
if w.Uses != 1 {
// We can't combine stores if the earlier
// store has any use besides the next one
args[1], args[2] = args[2], args[1]
off -= info.width
}
+
v.reset(info.pair)
v.AddArgs(args...)
v.Aux = aux
v.AuxInt = off
- v.Pos = w.Pos // take position of earlier of the two stores (TODO: not really working?)
+ // Take position of earlier of the two stores
+ if v_elem.i < w_elem.i {
+ v.Pos = w.Pos
+ } else {
+ w.Pos = v.Pos
+ }
// Make w just a memory copy.
wmem := w.MemoryArg()
}
func dwstoreBig(p *struct{ a, b, c, d, e, f int64 }, a, b, c, d, e, f int64) {
- // This is not perfect. We merge b+a, then d+e, then c and f have no pair.
+ // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 16\(R[0-9]+\)`
p.c = c
+ // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 32\(R[0-9]+\)`
p.f = f
// arm64:`STP\s\(R[0-9]+, R[0-9]+\), \(R[0-9]+\)`
p.a = a
- // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 24\(R[0-9]+\)`
p.e = e
p.d = d
p.b = b
}
+func dwstoreUnorderedArray(p *struct{ a, b, c, d int }, a, b, c, d int) {
+ // arm64:`STP\s\(R[0-9]+, R[0-9]+\), 16\(R[0-9]+\)`
+ p.c = c
+ p.d = d
+ // arm64:`STP\s\(R[0-9]+, R[0-9]+\), \(R[0-9]+\)`
+ p.a = a
+ p.b = b
+}
+
+func dwstoreBigNil(p *struct{ i, j struct{ a, b, c int } }) {
+ // arm64:`STP\s\(ZR, ZR\), 32\(R[0-9]+\)`
+ // arm64:`STP\s\(ZR, ZR\), 16\(R[0-9]+\)`
+ p.j = struct{ a, b, c int }{}
+ // arm64:`STP\s\(ZR, ZR\), \(R[0-9]+\)`
+ p.i = struct{ a, b, c int }{}
+}
+
func dwstoreRet() [2]int {
// arm64:"STP\t"
return [2]int{5, 6}