(CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c]))
// strength reduction
+// Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf:
+// 1 - addq, shlq, leaq, negq
+// 3 - imulq
+// This limits the rewrites to two instructions.
+// TODO: 27, 81
(MULQconst [-1] x) -> (NEGQ x)
(MULQconst [0] _) -> (MOVQconst [0])
(MULQconst [1] x) -> x
(MULQconst [3] x) -> (LEAQ2 x x)
(MULQconst [5] x) -> (LEAQ4 x x)
+(MULQconst [7] x) -> (LEAQ8 (NEGQ <v.Type> x) x)
(MULQconst [9] x) -> (LEAQ8 x x)
-(MULQconst [24] x) -> (SHLQconst [3] (LEAQ2 <v.Type> x x)) // Useful for [][]T accesses
+(MULQconst [11] x) -> (LEAQ2 x (LEAQ4 <v.Type> x x))
+(MULQconst [13] x) -> (LEAQ4 x (LEAQ2 <v.Type> x x))
+(MULQconst [21] x) -> (LEAQ4 x (LEAQ4 <v.Type> x x))
+(MULQconst [25] x) -> (LEAQ8 x (LEAQ2 <v.Type> x x))
+(MULQconst [37] x) -> (LEAQ4 x (LEAQ8 <v.Type> x x))
+(MULQconst [41] x) -> (LEAQ8 x (LEAQ4 <v.Type> x x))
+(MULQconst [73] x) -> (LEAQ8 x (LEAQ8 <v.Type> x x))
+
(MULQconst [c] x) && isPowerOfTwo(c) -> (SHLQconst [log2(c)] x)
+(MULQconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-2) && c >= 34 -> (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-4) && c >= 68 -> (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+(MULQconst [c] x) && isPowerOfTwo(c-8) && c >= 136 -> (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+(MULQconst [c] x) && c%3 == 0 && isPowerOfTwo(c/3)-> (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+(MULQconst [c] x) && c%5 == 0 && isPowerOfTwo(c/5)-> (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+(MULQconst [c] x) && c%9 == 0 && isPowerOfTwo(c/9)-> (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
// combine add/shift into LEAQ
(ADDQ x (SHLQconst [3] y)) -> (LEAQ8 x y)
v.AddArg(x)
return true
}
+ // match: (MULQconst [7] x)
+ // cond:
+ // result: (LEAQ8 (NEGQ <v.Type> x) x)
+ for {
+ if v.AuxInt != 7 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ8)
+ v0 := b.NewValue0(v.Line, OpAMD64NEGQ, v.Type)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
// match: (MULQconst [9] x)
// cond:
// result: (LEAQ8 x x)
v.AddArg(x)
return true
}
- // match: (MULQconst [24] x)
+ // match: (MULQconst [11] x)
// cond:
- // result: (SHLQconst [3] (LEAQ2 <v.Type> x x))
+ // result: (LEAQ2 x (LEAQ4 <v.Type> x x))
for {
- if v.AuxInt != 24 {
+ if v.AuxInt != 11 {
break
}
x := v.Args[0]
- v.reset(OpAMD64SHLQconst)
- v.AuxInt = 3
+ v.reset(OpAMD64LEAQ2)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [13] x)
+ // cond:
+ // result: (LEAQ4 x (LEAQ2 <v.Type> x x))
+ for {
+ if v.AuxInt != 13 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ4)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [21] x)
+ // cond:
+ // result: (LEAQ4 x (LEAQ4 <v.Type> x x))
+ for {
+ if v.AuxInt != 21 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ4)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [25] x)
+ // cond:
+ // result: (LEAQ8 x (LEAQ2 <v.Type> x x))
+ for {
+ if v.AuxInt != 25 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ8)
+ v.AddArg(x)
v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
v0.AddArg(x)
v0.AddArg(x)
v.AddArg(v0)
return true
}
+ // match: (MULQconst [37] x)
+ // cond:
+ // result: (LEAQ4 x (LEAQ8 <v.Type> x x))
+ for {
+ if v.AuxInt != 37 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ4)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [41] x)
+ // cond:
+ // result: (LEAQ8 x (LEAQ4 <v.Type> x x))
+ for {
+ if v.AuxInt != 41 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ8)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [73] x)
+ // cond:
+ // result: (LEAQ8 x (LEAQ8 <v.Type> x x))
+ for {
+ if v.AuxInt != 73 {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64LEAQ8)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (MULQconst [c] x)
// cond: isPowerOfTwo(c)
// result: (SHLQconst [log2(c)] x)
v.AddArg(x)
return true
}
+ // match: (MULQconst [c] x)
+ // cond: isPowerOfTwo(c+1) && c >= 15
+ // result: (SUBQ (SHLQconst <v.Type> [log2(c+1)] x) x)
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(isPowerOfTwo(c+1) && c >= 15) {
+ break
+ }
+ v.reset(OpAMD64SUBQ)
+ v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+ v0.AuxInt = log2(c + 1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: isPowerOfTwo(c-1) && c >= 17
+ // result: (LEAQ1 (SHLQconst <v.Type> [log2(c-1)] x) x)
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(isPowerOfTwo(c-1) && c >= 17) {
+ break
+ }
+ v.reset(OpAMD64LEAQ1)
+ v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+ v0.AuxInt = log2(c - 1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: isPowerOfTwo(c-2) && c >= 34
+ // result: (LEAQ2 (SHLQconst <v.Type> [log2(c-2)] x) x)
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(isPowerOfTwo(c-2) && c >= 34) {
+ break
+ }
+ v.reset(OpAMD64LEAQ2)
+ v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+ v0.AuxInt = log2(c - 2)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: isPowerOfTwo(c-4) && c >= 68
+ // result: (LEAQ4 (SHLQconst <v.Type> [log2(c-4)] x) x)
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(isPowerOfTwo(c-4) && c >= 68) {
+ break
+ }
+ v.reset(OpAMD64LEAQ4)
+ v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+ v0.AuxInt = log2(c - 4)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: isPowerOfTwo(c-8) && c >= 136
+ // result: (LEAQ8 (SHLQconst <v.Type> [log2(c-8)] x) x)
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(isPowerOfTwo(c-8) && c >= 136) {
+ break
+ }
+ v.reset(OpAMD64LEAQ8)
+ v0 := b.NewValue0(v.Line, OpAMD64SHLQconst, v.Type)
+ v0.AuxInt = log2(c - 8)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v.AddArg(x)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: c%3 == 0 && isPowerOfTwo(c/3)
+ // result: (SHLQconst [log2(c/3)] (LEAQ2 <v.Type> x x))
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(c%3 == 0 && isPowerOfTwo(c/3)) {
+ break
+ }
+ v.reset(OpAMD64SHLQconst)
+ v.AuxInt = log2(c / 3)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ2, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: c%5 == 0 && isPowerOfTwo(c/5)
+ // result: (SHLQconst [log2(c/5)] (LEAQ4 <v.Type> x x))
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(c%5 == 0 && isPowerOfTwo(c/5)) {
+ break
+ }
+ v.reset(OpAMD64SHLQconst)
+ v.AuxInt = log2(c / 5)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ4, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (MULQconst [c] x)
+ // cond: c%9 == 0 && isPowerOfTwo(c/9)
+ // result: (SHLQconst [log2(c/9)] (LEAQ8 <v.Type> x x))
+ for {
+ c := v.AuxInt
+ x := v.Args[0]
+ if !(c%9 == 0 && isPowerOfTwo(c/9)) {
+ break
+ }
+ v.reset(OpAMD64SHLQconst)
+ v.AuxInt = log2(c / 9)
+ v0 := b.NewValue0(v.Line, OpAMD64LEAQ8, v.Type)
+ v0.AddArg(x)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ return true
+ }
// match: (MULQconst [c] (MOVQconst [d]))
// cond:
// result: (MOVQconst [c*d])
--- /dev/null
+// runoutput
+
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Generate test of strength reduction for multiplications
+// with contstants. Especially useful for amd64/386.
+
+package main
+
+import "fmt"
+
+func testMul(fact, bits int) string {
+ n := fmt.Sprintf("testMul_%d_%d", fact, bits)
+ fmt.Printf("func %s(s int%d) {\n", n, bits)
+
+ want := 0
+ for i := 0; i < 200; i++ {
+ fmt.Printf(` if want, got := int%d(%d), s*%d; want != got {
+ failed = true
+ fmt.Printf("got %d * %%d == %%d, wanted %d\n", s, got)
+ }
+`, bits, want, i, i, want)
+ want += fact
+ }
+
+ fmt.Printf("}\n")
+ return fmt.Sprintf("%s(%d)", n, fact)
+}
+
+func main() {
+ fmt.Printf("package main\n")
+ fmt.Printf("import \"fmt\"\n")
+ fmt.Printf("var failed = false\n")
+
+ f1 := testMul(17, 32)
+ f2 := testMul(131, 64)
+
+ fmt.Printf("func main() {\n")
+ fmt.Println(f1)
+ fmt.Println(f2)
+ fmt.Printf("if failed {\n panic(\"multiplication failed\")\n}\n")
+ fmt.Printf("}\n")
+}