cmd/compile: generalize strength reduction of mulq

author Alexandru Moșoi <brtzsnr@gmail.com>

Thu, 24 Mar 2016 21:46:37 +0000 (22:46 +0100)

committer Alexandru Moșoi <alexandru@mosoi.ro>

Wed, 30 Mar 2016 22:27:13 +0000 (22:27 +0000)
author Alexandru Moșoi <brtzsnr@gmail.com>
Thu, 24 Mar 2016 21:46:37 +0000 (22:46 +0100)
committer Alexandru Moșoi <alexandru@mosoi.ro>
Wed, 30 Mar 2016 22:27:13 +0000 (22:27 +0000)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules

index 7ed2027135fd783210d1e44e7cb2d3c8ce2d82c8..0b1ce13e9e1c1d86045d66ff6a9cfdfc84fa5e1d 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -583,14 +583,35 @@
  (CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c]))
  
  // strength reduction
+// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
+//    1 - addq, shlq, leaq, negq
+//    3 - imulq
+// This limits the rewrites to two instructions.
+// TODO: 27, 81
  (MULQconst [-1] x) -> (NEGQ x)
  (MULQconst [0] _) -> (MOVQconst [0])
  (MULQconst [1] x) -> x
  (MULQconst [3] x) -> (LEAQ2 x x)
  (MULQconst [5] x) -> (LEAQ4 x x)
+(MULQconst [7] x) -> (LEAQ8 (NEGQ <v.Type> x) x)
  (MULQconst [9] x) -> (LEAQ8 x x)
-(MULQconst [24] x) -> (SHLQconst [3] (LEAQ2 <v.Type> x x)) // Useful for [][]T accesses
+(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
+(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
+(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
+(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
+(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
+(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
+(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
+
  (MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)
+(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
  
  // combine add/shift into LEAQ
  (ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 48257f5402f018975741910ab1fa5cb52b9083c9..99ffb66f65b0fda3fe1985fdf0e9f7db370cd5e5 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -9392,6 +9392,21 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
                 v.AddArg(x)
                 return true
         }
+       // match: (MULQconst [7] x)
+       // cond:
+       // result: (LEAQ8 (NEGQ <v.Type> x) x)
+       for {
+               if v.AuxInt != 7 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Line, OpAMD64NEGQ, v.Type)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
         // match: (MULQconst [9] x)
         // cond:
         // result: (LEAQ8 x x)
@@ -9405,22 +9420,118 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
                 v.AddArg(x)
                 return true
         }
-       // match: (MULQconst [24] x)
+       // match: (MULQconst [11] x)
         // cond:
-       // result: (SHLQconst [3] (LEAQ2 <v.Type> x x))
+       // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
         for {
-               if v.AuxInt != 24 {
+               if v.AuxInt != 11 {
                         break
                 }
                 x := v.Args[0]
-               v.reset(OpAMD64SHLQconst)
-               v.AuxInt = 3
+               v.reset(OpAMD64LEAQ2)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [13] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 13 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [21] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 21 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [25] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
+       for {
+               if v.AuxInt != 25 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
                 v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
                 v0.AddArg(x)
                 v0.AddArg(x)
                 v.AddArg(v0)
                 return true
         }
+       // match: (MULQconst [37] x)
+       // cond:
+       // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 37 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ4)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [41] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
+       for {
+               if v.AuxInt != 41 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [73] x)
+       // cond:
+       // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
+       for {
+               if v.AuxInt != 73 {
+                       break
+               }
+               x := v.Args[0]
+               v.reset(OpAMD64LEAQ8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
         // match: (MULQconst [c] x)
         // cond: isPowerOfTwo(c)
         // result: (SHLQconst [log2(c)] x)
@@ -9435,6 +9546,142 @@ func rewriteValueAMD64_OpAMD64MULQconst(v *Value, config *Config) bool {
                 v.AddArg(x)
                 return true
         }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c+1) && c >= 15
+       // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c+1) && c >= 15) {
+                       break
+               }
+               v.reset(OpAMD64SUBQ)
+               v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c + 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-1) && c >= 17
+       // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-1) && c >= 17) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ1)
+               v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-2) && c >= 34
+       // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-2) && c >= 34) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ2)
+               v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 2)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-4) && c >= 68
+       // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-4) && c >= 68) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ4)
+               v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 4)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: isPowerOfTwo(c-8) && c >= 136
+       // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(isPowerOfTwo(c-8) && c >= 136) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ8)
+               v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+               v0.AuxInt = log2(c - 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               v.AddArg(x)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%3 == 0 && isPowerOfTwo(c/3)
+       // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 3)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%5 == 0 && isPowerOfTwo(c/5)
+       // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 5)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (MULQconst [c] x)
+       // cond: c%9 == 0 && isPowerOfTwo(c/9)
+       // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+       for {
+               c := v.AuxInt
+               x := v.Args[0]
+               if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQconst)
+               v.AuxInt = log2(c / 9)
+               v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+               v0.AddArg(x)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
         // match: (MULQconst [c] (MOVQconst [d]))
         // cond:
         // result: (MOVQconst [c*d])
diff --git a/test/strength.go b/test/strength.go

new file mode 100644 (file)

index 0000000..94d589c
--- /dev/null
+++ b/test/strength.go
@@ -0,0 +1,45 @@
+// runoutput
+
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Generate test of strength reduction for multiplications
+// with contstants. Especially useful for amd64/386.
+
+package main
+
+import "fmt"
+
+func testMul(fact, bits int) string {
+       n := fmt.Sprintf("testMul_%d_%d", fact, bits)
+       fmt.Printf("func %s(s int%d) {\n", n, bits)
+
+       want := 0
+       for i := 0; i < 200; i++ {
+               fmt.Printf(`    if want, got := int%d(%d), s*%d; want != got {
+               failed = true
+               fmt.Printf("got %d * %%d == %%d, wanted %d\n",  s, got)
+       }
+`, bits, want, i, i, want)
+               want += fact
+       }
+
+       fmt.Printf("}\n")
+       return fmt.Sprintf("%s(%d)", n, fact)
+}
+
+func main() {
+       fmt.Printf("package main\n")
+       fmt.Printf("import \"fmt\"\n")
+       fmt.Printf("var failed = false\n")
+
+       f1 := testMul(17, 32)
+       f2 := testMul(131, 64)
+
+       fmt.Printf("func main() {\n")
+       fmt.Println(f1)
+       fmt.Println(f2)
+       fmt.Printf("if failed {\n       panic(\"multiplication failed\")\n}\n")
+       fmt.Printf("}\n")
+}
author	Alexandru Moșoi <brtzsnr@gmail.com>
	Thu, 24 Mar 2016 21:46:37 +0000 (22:46 +0100)
committer	Alexandru Moșoi <alexandru@mosoi.ro>
	Wed, 30 Mar 2016 22:27:13 +0000 (22:27 +0000)
src/cmd/compile/internal/ssa/gen/AMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
test/strength.go	[new file with mode: 0644]	patch \| blob