From 553443d41fbe643a29d452a5d4d4ce3b7442b0e9 Mon Sep 17 00:00:00 2001 From: "khr@golang.org" Date: Sat, 18 May 2024 08:01:27 -0700 Subject: [PATCH] cmd/compile: move limit fact table in prove pass to dense encoding Here begins a pretty major rewrite of the prove pass. The fundamental observation is that although keeping facts about relations between two SSA values could use O(n^2) space, keeping facts about relations between an SSA value and constants needs only O(n) space. We can just keep track of min/max for every SSA value at little cost. Redo the limit table to just keep track of limits for all SSA values. Use just a slice instead of a map. It may use more space (but still just O(n) space), but accesses are a lot faster. And with the cache in the compiler, that space will be reused quickly. This is part of my planning to add lots more constant limits in the prove pass. Change-Id: Ie36819fad5631a8b79c3630fe0e819521796551a Reviewed-on: https://go-review.googlesource.com/c/go/+/599255 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase Reviewed-by: Michael Knyszek --- .../compile/internal/ssa/_gen/allocators.go | 27 +++-- src/cmd/compile/internal/ssa/allocators.go | 102 +++++++++++------- src/cmd/compile/internal/ssa/cache.go | 2 +- src/cmd/compile/internal/ssa/prove.go | 64 ++++++----- 4 files changed, 111 insertions(+), 84 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/allocators.go b/src/cmd/compile/internal/ssa/_gen/allocators.go index 5869a61e82..56e6d69a31 100644 --- a/src/cmd/compile/internal/ssa/_gen/allocators.go +++ b/src/cmd/compile/internal/ssa/_gen/allocators.go @@ -46,14 +46,14 @@ func genAllocators() { maxLog: 32, }, { - name: "Int64Slice", - typ: "[]int64", + name: "LimitSlice", + typ: "[]limit", // the limit type is basically [4]uint64. capacity: "cap(%s)", - mak: "make([]int64, %s)", + mak: "make([]limit, %s)", resize: "%s[:%s]", - clear: "for i := range %[1]s {\n%[1]s[i] = 0\n}", - minLog: 5, - maxLog: 32, + clear: "for i := range %[1]s {\n%[1]s[i] = limit{}\n}", + minLog: 3, + maxLog: 30, }, { name: "SparseSet", @@ -92,30 +92,35 @@ func genAllocators() { typ: "[]*Block", base: "ValueSlice", }, + { + name: "Int64", + typ: "[]int64", + base: "LimitSlice", + }, { name: "IntSlice", typ: "[]int", - base: "Int64Slice", + base: "LimitSlice", }, { name: "Int32Slice", typ: "[]int32", - base: "Int64Slice", + base: "LimitSlice", }, { name: "Int8Slice", typ: "[]int8", - base: "Int64Slice", + base: "LimitSlice", }, { name: "BoolSlice", typ: "[]bool", - base: "Int64Slice", + base: "LimitSlice", }, { name: "IDSlice", typ: "[]ID", - base: "Int64Slice", + base: "LimitSlice", }, } diff --git a/src/cmd/compile/internal/ssa/allocators.go b/src/cmd/compile/internal/ssa/allocators.go index ff70795f82..222ae73f2b 100644 --- a/src/cmd/compile/internal/ssa/allocators.go +++ b/src/cmd/compile/internal/ssa/allocators.go @@ -47,42 +47,42 @@ func (c *Cache) freeValueSlice(s []*Value) { poolFreeValueSlice[b-5].Put(sp) } -var poolFreeInt64Slice [27]sync.Pool +var poolFreeLimitSlice [27]sync.Pool -func (c *Cache) allocInt64Slice(n int) []int64 { - var s []int64 +func (c *Cache) allocLimitSlice(n int) []limit { + var s []limit n2 := n - if n2 < 32 { - n2 = 32 + if n2 < 8 { + n2 = 8 } b := bits.Len(uint(n2 - 1)) - v := poolFreeInt64Slice[b-5].Get() + v := poolFreeLimitSlice[b-3].Get() if v == nil { - s = make([]int64, 1<= w && x > min ⇒ x > w // // Useful for i > 0; s[i-1]. - lim, ok := ft.limits[x.ID] - if ok && ((d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0)) { + lim := ft.limits[x.ID] + if lim != noLimit && ((d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0)) { ft.update(parent, x, w, d, gt) } } else if x, delta := isConstDelta(w); x != nil && delta == 1 { // v >= x+1 && x < max ⇒ v > x - lim, ok := ft.limits[x.ID] - if ok && ((d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op])) { + lim := ft.limits[x.ID] + if lim != noLimit && ((d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op])) { ft.update(parent, v, x, d, gt) } } @@ -465,7 +467,7 @@ func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) { parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d) } underflow := true - if l, has := ft.limits[x.ID]; has && delta < 0 { + if l := ft.limits[x.ID]; l != noLimit && delta < 0 { if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) || (x.Type.Size() == 4 && l.min >= math.MinInt32-delta) { underflow = false @@ -543,7 +545,7 @@ func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) { // We know that either x>min OR x<=max. factsTable cannot record OR conditions, // so let's see if we can already prove that one of them is false, in which case // the other must be true - if l, has := ft.limits[x.ID]; has { + if l := ft.limits[x.ID]; l != noLimit { if l.max <= min { if r&eq == 0 || l.max < min { // x>min (x>=min) is impossible, so it must be x<=max @@ -617,13 +619,13 @@ func (ft *factsTable) isNonNegative(v *Value) bool { // Check if the recorded limits can prove that the value is positive - if l, has := ft.limits[v.ID]; has && (l.min >= 0 || l.umax <= uint64(max)) { + if l := ft.limits[v.ID]; l != noLimit && (l.min >= 0 || l.umax <= uint64(max)) { return true } // Check if v = x+delta, and we can use x's limits to prove that it's positive if x, delta := isConstDelta(v); x != nil { - if l, has := ft.limits[x.ID]; has { + if l := ft.limits[x.ID]; l != noLimit { if delta > 0 && l.min >= -delta && l.max <= max-delta { return true } @@ -681,11 +683,7 @@ func (ft *factsTable) restore() { if old.vid == 0 { // checkpointBound break } - if old.limit == noLimit { - delete(ft.limits, old.vid) - } else { - ft.limits[old.vid] = old.limit - } + ft.limits[old.vid] = old.limit } ft.orderS.Undo() ft.orderU.Undo() @@ -765,6 +763,7 @@ func (ft *factsTable) cleanup(f *Func) { } f.retPoset(po) } + f.Cache.freeLimitSlice(ft.limits) } // prove removes redundant BlockIf branches that can be inferred @@ -1261,10 +1260,7 @@ func addBranchRestrictions(ft *factsTable, b *Block, br branch) { c = v val -= off } - old, ok := ft.limits[c.ID] - if !ok { - old = noLimit - } + old := ft.limits[c.ID] ft.limitStack = append(ft.limitStack, limitFact{c.ID, old}) if val < old.min || val > old.max || uint64(val) < old.umin || uint64(val) > old.umax { ft.unsat = true @@ -1473,8 +1469,8 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { } // slicemask(x + y) // if x is larger than -y (y is negative), then slicemask is -1. - lim, ok := ft.limits[x.ID] - if !ok { + lim := ft.limits[x.ID] + if lim == noLimit { break } if lim.umin > uint64(-delta) { @@ -1493,8 +1489,8 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { // code for CtzNN if we know that the argument is non-zero. // Capture that information here for use in arch-specific optimizations. x := v.Args[0] - lim, ok := ft.limits[x.ID] - if !ok { + lim := ft.limits[x.ID] + if lim == noLimit { break } if lim.umin > 0 || lim.min > 0 || lim.max < 0 { @@ -1542,8 +1538,8 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { // Check whether, for a << b, we know that b // is strictly less than the number of bits in a. by := v.Args[1] - lim, ok := ft.limits[by.ID] - if !ok { + lim := ft.limits[by.ID] + if lim == noLimit { break } bits := 8 * v.Args[0].Type.Size() @@ -1563,11 +1559,11 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { break } divr := v.Args[1] - divrLim, divrLimok := ft.limits[divr.ID] + divrLim := ft.limits[divr.ID] divd := v.Args[0] - divdLim, divdLimok := ft.limits[divd.ID] - if (divrLimok && (divrLim.max < -1 || divrLim.min > -1)) || - (divdLimok && divdLim.min > mostNegativeDividend[v.Op]) { + divdLim := ft.limits[divd.ID] + if (divrLim != noLimit && (divrLim.max < -1 || divrLim.min > -1)) || + (divdLim != noLimit && divdLim.min > mostNegativeDividend[v.Op]) { // See DivisionNeedsFixUp in rewrite.go. // v.AuxInt = 1 means we have proved both that the divisor is not -1 // and that the dividend is not the most negative integer, @@ -1585,8 +1581,8 @@ func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { case OpConst64, OpConst32, OpConst16, OpConst8: continue } - lim, ok := ft.limits[arg.ID] - if !ok { + lim := ft.limits[arg.ID] + if lim == noLimit { continue } -- 2.48.1