From b91cc5303364c4aae758ff1f0b4efc66b7802700 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Alexandru=20Mo=C8=99oi?= <mosoi@google.com>
Date: Wed, 2 Mar 2016 12:58:27 +0100
Subject: [PATCH] cmd/compile/internal/ssa: BCE for induction variables
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

There are 5293 loop in the main go repository.
A survey of the top most common for loops:

     18 for __k__ := 0; i < len(sa.Addr); i++ {
     19 for __k__ := 0; ; i++ {
     19 for __k__ := 0; i < 16; i++ {
     25 for __k__ := 0; i < length; i++ {
     30 for __k__ := 0; i < 8; i++ {
     49 for __k__ := 0; i < len(s); i++ {
     67 for __k__ := 0; i < n; i++ {
    376 for __k__ := range __slice__ {
    685 for __k__, __v__ := range __slice__ {
   2074 for __, __v__ := range __slice__ {

The algorithm to find induction variables handles all cases
with an upper limit. It currently doesn't find related induction
variables such as c * ind or c + ind.

842 out of 22954 bound checks are removed for src/make.bash.
1957 out of 42952 bounds checks are removed for src/all.bash.

Things to do in follow-up CLs:
* Find the associated pointer for `for _, v := range a {}`
* Drop the NilChecks on the pointer.
* Replace the implicit induction variable by a loop over the pointer

Generated garbage can be reduced if we share the sdom between passes.

% benchstat old.txt new.txt
name       old time/op     new time/op     delta
Template       337ms Â± 3%      333ms Â± 3%    ~             (p=0.258 n=9+9)
GoTypes        1.11s Â± 2%      1.10s Â± 2%    ~           (p=0.912 n=10+10)
Compiler       5.25s Â± 1%      5.29s Â± 2%    ~             (p=0.077 n=9+9)
MakeBash       33.5s Â± 1%      34.1s Â± 2%  +1.85%          (p=0.011 n=9+9)

name       old alloc/op    new alloc/op    delta
Template      63.6MB Â± 0%     63.9MB Â± 0%  +0.52%         (p=0.000 n=10+9)
GoTypes        218MB Â± 0%      219MB Â± 0%  +0.59%         (p=0.000 n=10+9)
Compiler       978MB Â± 0%      985MB Â± 0%  +0.69%        (p=0.000 n=10+10)

name       old allocs/op   new allocs/op   delta
Template        582k Â± 0%       583k Â± 0%  +0.10%        (p=0.000 n=10+10)
GoTypes        1.78M Â± 0%      1.78M Â± 0%  +0.12%        (p=0.000 n=10+10)
Compiler       7.68M Â± 0%      7.69M Â± 0%  +0.05%        (p=0.000 n=10+10)

name       old text-bytes  new text-bytes  delta
HelloSize       581k Â± 0%       581k Â± 0%  -0.08%        (p=0.000 n=10+10)
CmdGoSize      6.40M Â± 0%      6.39M Â± 0%  -0.08%        (p=0.000 n=10+10)

name       old data-bytes  new data-bytes  delta
HelloSize      3.66k Â± 0%      3.66k Â± 0%    ~     (all samples are equal)
CmdGoSize       134k Â± 0%       134k Â± 0%    ~     (all samples are equal)

name       old bss-bytes   new bss-bytes   delta
HelloSize       126k Â± 0%       126k Â± 0%    ~     (all samples are equal)
CmdGoSize       149k Â± 0%       149k Â± 0%    ~     (all samples are equal)

name       old exe-bytes   new exe-bytes   delta
HelloSize       947k Â± 0%       946k Â± 0%  -0.01%        (p=0.000 n=10+10)
CmdGoSize      9.92M Â± 0%      9.91M Â± 0%  -0.06%        (p=0.000 n=10+10)

Change-Id: Ie74bdff46fd602db41bb457333d3a762a0c3dc4d
Reviewed-on: https://go-review.googlesource.com/20517
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Alexandru MoÈoi <alexandru@mosoi.ro>
---
 src/cmd/compile/internal/ssa/compile.go |   1 +
 src/cmd/compile/internal/ssa/loopbce.go | 260 ++++++++++++++++++++++++
 test/loopbce.go                         | 176 ++++++++++++++++
 test/prove.go                           |   4 +-
 4 files changed, 439 insertions(+), 2 deletions(-)
 create mode 100644 src/cmd/compile/internal/ssa/loopbce.go
 create mode 100644 test/loopbce.go

diff --git a/src/cmd/compile/internal/ssa/compile.go b/src/cmd/compile/internal/ssa/compile.go
index d6c2bf83ef..4a880f31f3 100644
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@@ -237,6 +237,7 @@ var passes = [...]pass{
 	{name: "phiopt", fn: phiopt},
 	{name: "nilcheckelim", fn: nilcheckelim},
 	{name: "prove", fn: prove},
+	{name: "loopbce", fn: loopbce},
 	{name: "decompose builtin", fn: decomposeBuiltIn, required: true},
 	{name: "dec", fn: dec, required: true},
 	{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
diff --git a/src/cmd/compile/internal/ssa/loopbce.go b/src/cmd/compile/internal/ssa/loopbce.go
new file mode 100644
index 0000000000..7fbb48a7fc
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/loopbce.go
@@ -0,0 +1,260 @@
+package ssa
+
+type indVar struct {
+	ind   *Value // induction variable
+	inc   *Value // increment, a constant
+	nxt   *Value // ind+inc variable
+	min   *Value // minimum value. inclusive,
+	max   *Value // maximum value. exclusive.
+	entry *Block // entry block in the loop.
+	// Invariants: for all blocks dominated by entry:
+	//	min <= ind < max
+	//	min <= nxt <= max
+}
+
+// findIndVar finds induction variables in a function.
+//
+// Look for variables and blocks that satisfy the following
+//
+// loop:
+//   ind = (Phi min nxt),
+//   if ind < max
+//     then goto enter_loop
+//     else goto exit_loop
+//
+//   enter_loop:
+//	do something
+//      nxt = inc + ind
+//	goto loop
+//
+// exit_loop:
+//
+//
+// TODO: handle 32 bit operations
+func findIndVar(f *Func, sdom sparseTree) []indVar {
+	var iv []indVar
+
+nextb:
+	for _, b := range f.Blocks {
+		if b.Kind != BlockIf || len(b.Preds) != 2 {
+			continue
+		}
+
+		var ind, max *Value // induction, and maximum
+		entry := -1         // which successor of b enters the loop
+
+		// Check thet the control if it either ind < max or max > ind.
+		// TODO: Handle Leq64, Geq64.
+		switch b.Control.Op {
+		case OpLess64:
+			entry = 0
+			ind, max = b.Control.Args[0], b.Control.Args[1]
+		case OpGreater64:
+			entry = 0
+			ind, max = b.Control.Args[1], b.Control.Args[0]
+		default:
+			continue nextb
+		}
+
+		// Check that the induction variable is a phi that depends on itself.
+		if ind.Op != OpPhi {
+			continue
+		}
+
+		// Extract min and nxt knowing that nxt is an addition (e.g. Add64).
+		var min, nxt *Value // minimum, and next value
+		if n := ind.Args[0]; n.Op == OpAdd64 && (n.Args[0] == ind || n.Args[1] == ind) {
+			min, nxt = ind.Args[1], n
+		} else if n := ind.Args[1]; n.Op == OpAdd64 && (n.Args[0] == ind || n.Args[1] == ind) {
+			min, nxt = ind.Args[0], n
+		} else {
+			// Not a recognized induction variable.
+			continue
+		}
+
+		var inc *Value
+		if nxt.Args[0] == ind { // nxt = ind + inc
+			inc = nxt.Args[1]
+		} else if nxt.Args[1] == ind { // nxt = inc + ind
+			inc = nxt.Args[0]
+		} else {
+			panic("unreachable") // one of the cases must be true from the above.
+		}
+
+		// Expect the increment to be a positive constant.
+		// TODO: handle negative increment.
+		if inc.Op != OpConst64 || inc.AuxInt <= 0 {
+			continue
+		}
+
+		// Up to now we extracted the induction variable (ind),
+		// the increment delta (inc), the temporary sum (nxt),
+		// the mininum value (min) and the maximum value (max).
+		//
+		// We also know that ind has the form (Phi min nxt) where
+		// nxt is (Add inc nxt) which means: 1) inc dominates nxt
+		// and 2) there is a loop starting at inc and containing nxt.
+		//
+		// We need to prove that the induction variable is incremented
+		// only when it's smaller than the maximum value.
+		// Two conditions must happen listed below to accept ind
+		// as an induction variable.
+
+		// First condition: loop entry has a single predecessor, which
+		// is the header block.  This implies that b.Succs[entry] is
+		// reached iff ind < max.
+		if len(b.Succs[entry].Preds) != 1 {
+			// b.Succs[1-entry] must exit the loop.
+			continue
+		}
+
+		// Second condition: b.Succs[entry] dominates nxt so that
+		// nxt is computed when inc < max, meaning nxt <= max.
+		if !sdom.isAncestorEq(b.Succs[entry], nxt.Block) {
+			// inc+ind can only be reached through the branch that enters the loop.
+			continue
+		}
+
+		// If max is c + SliceLen with c <= 0 then we drop c.
+		// Makes sure c + SliceLen doesn't overflow when SliceLen == 0.
+		// TODO: save c as an offset from max.
+		if w, c := dropAdd64(max); (w.Op == OpStringLen || w.Op == OpSliceLen) && 0 >= c && -c >= 0 {
+			max = w
+		}
+
+		// We can only guarantee that the loops runs withing limits of induction variable
+		// if the increment is 1 or when the limits are constants.
+		if inc.AuxInt != 1 {
+			ok := false
+			if min.Op == OpConst64 && max.Op == OpConst64 {
+				if max.AuxInt > min.AuxInt && max.AuxInt%inc.AuxInt == min.AuxInt%inc.AuxInt { // handle overflow
+					ok = true
+				}
+			}
+			if !ok {
+				continue
+			}
+		}
+
+		if f.pass.debug > 1 {
+			if min.Op == OpConst64 {
+				b.Func.Config.Warnl(b.Line, "Induction variable with minimum %d and increment %d", min.AuxInt, inc.AuxInt)
+			} else {
+				b.Func.Config.Warnl(b.Line, "Induction variable with non-const minimum and increment %d", inc.AuxInt)
+			}
+		}
+
+		iv = append(iv, indVar{
+			ind:   ind,
+			inc:   inc,
+			nxt:   nxt,
+			min:   min,
+			max:   max,
+			entry: b.Succs[entry],
+		})
+		b.Logf("found induction variable %v (inc = %v, min = %v, max = %v)\n", ind, inc, min, max)
+	}
+
+	return iv
+}
+
+// loopbce performs loop based bounds check elimination.
+func loopbce(f *Func) {
+	idom := dominators(f)
+	sdom := newSparseTree(f, idom)
+	ivList := findIndVar(f, sdom)
+
+	m := make(map[*Value]indVar)
+	for _, iv := range ivList {
+		m[iv.ind] = iv
+	}
+
+	removeBoundsChecks(f, sdom, m)
+}
+
+// removesBoundsChecks remove IsInBounds and IsSliceInBounds based on the induction variables.
+func removeBoundsChecks(f *Func, sdom sparseTree, m map[*Value]indVar) {
+	for _, b := range f.Blocks {
+		if b.Kind != BlockIf {
+			continue
+		}
+
+		v := b.Control
+
+		// Simplify:
+		// (IsInBounds ind max) where 0 <= const == min <= ind < max.
+		// (IsSliceInBounds ind max) where 0 <= const == min <= ind < max.
+		// Found in:
+		//	for i := range a {
+		//		use a[i]
+		//		use a[i:]
+		//		use a[:i]
+		//	}
+		if v.Op == OpIsInBounds || v.Op == OpIsSliceInBounds {
+			ind, add := dropAdd64(v.Args[0])
+			if ind.Op != OpPhi {
+				goto skip1
+			}
+			if v.Op == OpIsInBounds && add != 0 {
+				goto skip1
+			}
+			if v.Op == OpIsSliceInBounds && (0 > add || add > 1) {
+				goto skip1
+			}
+
+			if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+				if v.Args[1] == iv.max {
+					if f.pass.debug > 0 {
+						f.Config.Warnl(b.Line, "Found redundant %s", v.Op)
+					}
+					goto simplify
+				}
+			}
+		}
+	skip1:
+
+		// Simplify:
+		// (IsSliceInBounds ind (SliceCap a)) where 0 <= min <= ind < max == (SliceLen a)
+		// Found in:
+		//	for i := range a {
+		//		use a[:i]
+		//		use a[:i+1]
+		//	}
+		if v.Op == OpIsSliceInBounds {
+			ind, add := dropAdd64(v.Args[0])
+			if ind.Op != OpPhi {
+				goto skip2
+			}
+			if 0 > add || add > 1 {
+				goto skip2
+			}
+
+			if iv, has := m[ind]; has && sdom.isAncestorEq(iv.entry, b) && isNonNegative(iv.min) {
+				if v.Args[1].Op == OpSliceCap && iv.max.Op == OpSliceLen && v.Args[1].Args[0] == iv.max.Args[0] {
+					if f.pass.debug > 0 {
+						f.Config.Warnl(b.Line, "Found redundant %s (len promoted to cap)", v.Op)
+					}
+					goto simplify
+				}
+			}
+		}
+	skip2:
+
+		continue
+
+	simplify:
+		f.Logf("removing bounds check %v at %v in %s\n", b.Control, b, f.Name)
+		b.Kind = BlockFirst
+		b.SetControl(nil)
+	}
+}
+
+func dropAdd64(v *Value) (*Value, int64) {
+	if v.Op == OpAdd64 && v.Args[0].Op == OpConst64 {
+		return v.Args[1], v.Args[0].AuxInt
+	}
+	if v.Op == OpAdd64 && v.Args[1].Op == OpConst64 {
+		return v.Args[0], v.Args[1].AuxInt
+	}
+	return v, 0
+}
diff --git a/test/loopbce.go b/test/loopbce.go
new file mode 100644
index 0000000000..eb44092705
--- /dev/null
+++ b/test/loopbce.go
@@ -0,0 +1,176 @@
+// +build amd64
+// errorcheck -0 -d=ssa/loopbce/debug=3
+
+package main
+
+func f0a(a []int) int {
+	x := 0
+	for i := range a { // ERROR "Induction variable with minimum 0 and increment 1$"
+		x += a[i] // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func f0b(a []int) int {
+	x := 0
+	for i := range a { // ERROR "Induction variable with minimum 0 and increment 1$"
+		b := a[i:] // ERROR "Found redundant IsSliceInBounds$"
+		x += b[0]
+	}
+	return x
+}
+
+func f0c(a []int) int {
+	x := 0
+	for i := range a { // ERROR "Induction variable with minimum 0 and increment 1$"
+		b := a[:i+1] // ERROR "Found redundant IsSliceInBounds \(len promoted to cap\)$"
+		x += b[0]
+	}
+	return x
+}
+
+func f1(a []int) int {
+	x := 0
+	for _, i := range a { // ERROR "Induction variable with minimum 0 and increment 1$"
+		x += i
+	}
+	return x
+}
+
+func f2(a []int) int {
+	x := 0
+	for i := 1; i < len(a); i++ { // ERROR "Induction variable with minimum 1 and increment 1$"
+		x += a[i] // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func f4(a [10]int) int {
+	x := 0
+	for i := 0; i < len(a); i += 2 { // ERROR "Induction variable with minimum 0 and increment 2$"
+		x += a[i] // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func f5(a [10]int) int {
+	x := 0
+	for i := -10; i < len(a); i += 2 { // ERROR "Induction variable with minimum -10 and increment 2$"
+		x += a[i]
+	}
+	return x
+}
+
+func f6(a []int) {
+	for i := range a { // ERROR "Induction variable with minimum 0 and increment 1$"
+		b := a[0:i] // ERROR "Found redundant IsSliceInBounds \(len promoted to cap\)$"
+		f6(b)
+	}
+}
+
+func g0a(a string) int {
+	x := 0
+	for i := 0; i < len(a); i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		x += int(a[i]) // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func g0b(a string) int {
+	x := 0
+	for i := 0; len(a) > i; i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		x += int(a[i]) // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func g1() int {
+	a := "evenlength"
+	x := 0
+	for i := 0; i < len(a); i += 2 { // ERROR "Induction variable with minimum 0 and increment 2$"
+		x += int(a[i]) // ERROR "Found redundant IsInBounds$"
+	}
+	return x
+}
+
+func g2() int {
+	a := "evenlength"
+	x := 0
+	for i := 0; i < len(a); i += 2 { // ERROR "Induction variable with minimum 0 and increment 2$"
+		j := i
+		if a[i] == 'e' { // ERROR "Found redundant IsInBounds$"
+			j = j + 1
+		}
+		x += int(a[j])
+	}
+	return x
+}
+
+func g3a() {
+	a := "this string has length 25"
+	for i := 0; i < len(a); i += 5 { // ERROR "Induction variable with minimum 0 and increment 5$"
+		useString(a[i:]) // ERROR "Found redundant IsSliceInBounds$"
+		useString(a[:i+3])
+	}
+}
+
+func g3b(a string) {
+	for i := 0; i < len(a); i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		useString(a[i+1:]) // ERROR "Found redundant IsSliceInBounds$"
+	}
+}
+
+func g3c(a string) {
+	for i := 0; i < len(a); i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		useString(a[:i+1]) // ERROR "Found redundant IsSliceInBounds$"
+	}
+}
+
+func h1(a []byte) {
+	c := a[:128]
+	for i := range c { // ERROR "Induction variable with minimum 0 and increment 1$"
+		c[i] = byte(i) // ERROR "Found redundant IsInBounds$"
+	}
+}
+
+func h2(a []byte) {
+	for i := range a[:128] { // ERROR "Induction variable with minimum 0 and increment 1$"
+		a[i] = byte(i)
+	}
+}
+
+func nobce1() {
+	// tests overflow of max-min
+	a := int64(9223372036854774057)
+	b := int64(-1547)
+	z := int64(1337)
+
+	if a%z == b%z {
+		panic("invalid test: modulos should differ")
+	}
+
+	for i := b; i < a; i += z {
+		// No induction variable is possible because i will overflow a first iteration.
+		useString("foobar")
+	}
+}
+
+func nobce2(a string) {
+	for i := int64(0); i < int64(len(a)); i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		useString(a[i:]) // ERROR "Found redundant IsSliceInBounds$"
+	}
+	for i := int64(0); i < int64(len(a))-31337; i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		useString(a[i:]) // ERROR "Found redundant IsSliceInBounds$"
+	}
+	for i := int64(0); i < int64(len(a))+int64(-1<<63); i++ { // ERROR "Induction variable with minimum 0 and increment 1$"
+		// tests an overflow of StringLen-MinInt64
+		useString(a[i:])
+	}
+}
+
+//go:noinline
+func useString(a string) {
+}
+
+func main() {
+}
diff --git a/test/prove.go b/test/prove.go
index fc2908eb03..4fc1d674d8 100644
--- a/test/prove.go
+++ b/test/prove.go
@@ -40,8 +40,8 @@ func f1b(a []int, i int, j uint) int {
 
 func f2(a []int) int {
 	for i := range a {
-		a[i] = i
-		a[i] = i // ERROR "Proved boolean IsInBounds$"
+		a[i+1] = i
+		a[i+1] = i // ERROR "Proved boolean IsInBounds$"
 	}
 	return 34
 }
-- 
2.50.0