]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: combine x*n - y*n into (x-y)*n
authorCholerae Hu <choleraehyq@gmail.com>
Thu, 17 Aug 2017 07:06:42 +0000 (15:06 +0800)
committerKeith Randall <khr@golang.org>
Sun, 3 Sep 2017 14:29:38 +0000 (14:29 +0000)
Do the similar thing to CL 55143 to reduce IMUL.

Change-Id: I1bd38f618058e3cd74fac181f003610ea13f2294
Reviewed-on: https://go-review.googlesource.com/56252
Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/ssa/gen/generic.rules
src/cmd/compile/internal/ssa/rewritegeneric.go
test/mergemul.go

index 3bf8dcd42d9fca0452c970b5e86672a49b6fbfb8..0445caba66973241a6fc2f2d52073273e4bd89bd 100644 (file)
@@ -831,6 +831,20 @@ var linuxAMD64Tests = []*asmTest{
                }`,
                pos: []string{"\tADDQ\t[$]19", "\tIMULQ"}, // (a+19)*n
        },
+       {
+               `
+               func mul4(n int) int {
+                       return 23*n - 9*n
+               }`,
+               []string{"\tIMULQ\t[$]14"}, // 14*n
+       },
+       {
+               `
+               func mul5(a, n int) int {
+                       return a*n - 19*n
+               }`,
+               []string{"\tADDQ\t[$]-19", "\tIMULQ"}, // (a-19)*n
+       },
 
        // see issue 19595.
        // We want to merge load+op in f58, but not in f59.
@@ -1150,6 +1164,20 @@ var linux386Tests = []*asmTest{
                `,
                pos: []string{"TEXT\t.*, [$]0-4"},
        },
+       {
+               `
+               func mul3(n int) int {
+                       return 23*n - 9*n
+               }`,
+               []string{"\tIMULL\t[$]14"}, // 14*n
+       },
+       {
+               `
+               func mul4(a, n int) int {
+                       return n*a - a*19
+               }`,
+               []string{"\tADDL\t[$]-19", "\tIMULL"}, // (n-19)*a
+       },
 }
 
 var linuxS390XTests = []*asmTest{
index 1faf6b3857f2c9b6de3cf2c82cabddba15cca372..dd4018abe25125d06fbe7f39ee6d0fbd35a18e07 100644 (file)
 (Add16 <t> (Mul16 x y) (Mul16 x z)) -> (Mul16 x (Add16 <t> y z))
 (Add8  <t> (Mul8  x y) (Mul8  x z)) -> (Mul8  x (Add8  <t> y z))
 
+// Rewrite x*y - x*z  to  x*(y-z)
+(Sub64 <t> (Mul64 x y) (Mul64 x z)) -> (Mul64 x (Sub64 <t> y z))
+(Sub32 <t> (Mul32 x y) (Mul32 x z)) -> (Mul32 x (Sub32 <t> y z))
+(Sub16 <t> (Mul16 x y) (Mul16 x z)) -> (Mul16 x (Sub16 <t> y z))
+(Sub8  <t> (Mul8  x y) (Mul8  x z)) -> (Mul8  x (Sub8  <t> y z))
+
 // rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce
 // the number of the other rewrite rules for const shifts
 (Lsh64x32  <t> x (Const32 [c])) -> (Lsh64x64  x (Const64 <t> [int64(uint32(c))]))
index fdd4c1e1677d180145e9fea5aadf9797f361441d..83108442871cdc5eee1e3c8c238e54a16fcc9e36 100644 (file)
@@ -23796,6 +23796,126 @@ func rewriteValuegeneric_OpSub16_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub16 <t> (Mul16 x y) (Mul16 x z))
+       // cond:
+       // result: (Mul16 x (Sub16 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub16 <t> (Mul16 y x) (Mul16 x z))
+       // cond:
+       // result: (Mul16 x (Sub16 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub16 <t> (Mul16 x y) (Mul16 z x))
+       // cond:
+       // result: (Mul16 x (Sub16 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub16 <t> (Mul16 y x) (Mul16 z x))
+       // cond:
+       // result: (Mul16 x (Sub16 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul16 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul16 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul16)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub16, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Sub16 x x)
        // cond:
        // result: (Const16 [0])
@@ -23869,6 +23989,11 @@ func rewriteValuegeneric_OpSub16_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       return false
+}
+func rewriteValuegeneric_OpSub16_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (Sub16 (Add16 y x) y)
        // cond:
        // result: x
@@ -23977,11 +24102,6 @@ func rewriteValuegeneric_OpSub16_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpSub16_10(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Sub16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x))
        // cond:
        // result: (Add16 (Const16 <t> [int64(int16(c-d))]) x)
@@ -24060,6 +24180,126 @@ func rewriteValuegeneric_OpSub32_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub32 <t> (Mul32 x y) (Mul32 x z))
+       // cond:
+       // result: (Mul32 x (Sub32 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub32 <t> (Mul32 y x) (Mul32 x z))
+       // cond:
+       // result: (Mul32 x (Sub32 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub32 <t> (Mul32 x y) (Mul32 z x))
+       // cond:
+       // result: (Mul32 x (Sub32 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub32 <t> (Mul32 y x) (Mul32 z x))
+       // cond:
+       // result: (Mul32 x (Sub32 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul32 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul32 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul32)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub32, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Sub32 x x)
        // cond:
        // result: (Const32 [0])
@@ -24133,6 +24373,11 @@ func rewriteValuegeneric_OpSub32_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       return false
+}
+func rewriteValuegeneric_OpSub32_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (Sub32 (Add32 y x) y)
        // cond:
        // result: x
@@ -24241,11 +24486,6 @@ func rewriteValuegeneric_OpSub32_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpSub32_10(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Sub32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x))
        // cond:
        // result: (Add32 (Const32 <t> [int64(int32(c-d))]) x)
@@ -24364,6 +24604,126 @@ func rewriteValuegeneric_OpSub64_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub64 <t> (Mul64 x y) (Mul64 x z))
+       // cond:
+       // result: (Mul64 x (Sub64 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul64)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub64 <t> (Mul64 y x) (Mul64 x z))
+       // cond:
+       // result: (Mul64 x (Sub64 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul64)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub64 <t> (Mul64 x y) (Mul64 z x))
+       // cond:
+       // result: (Mul64 x (Sub64 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul64)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub64 <t> (Mul64 y x) (Mul64 z x))
+       // cond:
+       // result: (Mul64 x (Sub64 <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul64 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul64 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul64)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub64, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Sub64 x x)
        // cond:
        // result: (Const64 [0])
@@ -24437,6 +24797,11 @@ func rewriteValuegeneric_OpSub64_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       return false
+}
+func rewriteValuegeneric_OpSub64_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (Sub64 (Add64 y x) y)
        // cond:
        // result: x
@@ -24545,11 +24910,6 @@ func rewriteValuegeneric_OpSub64_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpSub64_10(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Sub64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x))
        // cond:
        // result: (Add64 (Const64 <t> [c-d]) x)
@@ -24668,6 +25028,126 @@ func rewriteValuegeneric_OpSub8_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (Sub8 <t> (Mul8 x y) (Mul8 x z))
+       // cond:
+       // result: (Mul8  x (Sub8  <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub8 <t> (Mul8 y x) (Mul8 x z))
+       // cond:
+       // result: (Mul8  x (Sub8  <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               _ = v_1.Args[1]
+               if x != v_1.Args[0] {
+                       break
+               }
+               z := v_1.Args[1]
+               v.reset(OpMul8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub8 <t> (Mul8 x y) (Mul8 z x))
+       // cond:
+       // result: (Mul8  x (Sub8  <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               _ = v_0.Args[1]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (Sub8 <t> (Mul8 y x) (Mul8 z x))
+       // cond:
+       // result: (Mul8  x (Sub8  <t> y z))
+       for {
+               t := v.Type
+               _ = v.Args[1]
+               v_0 := v.Args[0]
+               if v_0.Op != OpMul8 {
+                       break
+               }
+               _ = v_0.Args[1]
+               y := v_0.Args[0]
+               x := v_0.Args[1]
+               v_1 := v.Args[1]
+               if v_1.Op != OpMul8 {
+                       break
+               }
+               _ = v_1.Args[1]
+               z := v_1.Args[0]
+               if x != v_1.Args[1] {
+                       break
+               }
+               v.reset(OpMul8)
+               v.AddArg(x)
+               v0 := b.NewValue0(v.Pos, OpSub8, t)
+               v0.AddArg(y)
+               v0.AddArg(z)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Sub8 x x)
        // cond:
        // result: (Const8  [0])
@@ -24741,6 +25221,11 @@ func rewriteValuegeneric_OpSub8_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       return false
+}
+func rewriteValuegeneric_OpSub8_10(v *Value) bool {
+       b := v.Block
+       _ = b
        // match: (Sub8 (Add8 y x) y)
        // cond:
        // result: x
@@ -24849,11 +25334,6 @@ func rewriteValuegeneric_OpSub8_0(v *Value) bool {
                v.AddArg(x)
                return true
        }
-       return false
-}
-func rewriteValuegeneric_OpSub8_10(v *Value) bool {
-       b := v.Block
-       _ = b
        // match: (Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x))
        // cond:
        // result: (Add8  (Const8  <t> [int64(int8(c-d))]) x)
index 86fbe676cb8173e9876aecc18cbd63cb1dc0d05a..a23115b612392acd95a38c8b390b297abac031e8 100644 (file)
@@ -24,7 +24,7 @@ import "fmt"
 //   if a8 != b8 {
 //        // print error msg and panic
 //   }
-func makeMergeTest(m1, m2, k int, size string) string {
+func makeMergeAddTest(m1, m2, k int, size string) string {
 
        model := "    a" + size + ", b" + size
        model += fmt.Sprintf(" = %%d*n%s + %%d*(n%s+%%d), (%%d+%%d)*n%s + (%%d*%%d)", size, size, size)
@@ -32,7 +32,39 @@ func makeMergeTest(m1, m2, k int, size string) string {
        test := fmt.Sprintf(model, m1, m2, k, m1, m2, m2, k)
        test += fmt.Sprintf(`
     if a%s != b%s {
-        fmt.Printf("MergeTest(%d, %d, %d, %s) failed\n")
+        fmt.Printf("MergeAddTest(%d, %d, %d, %s) failed\n")
+        fmt.Printf("%%d != %%d\n", a%s, b%s)
+        panic("FAIL")
+    }
+`, size, size, m1, m2, k, size, size, size)
+       return test + "\n"
+}
+
+// Check that expressions like (c*n - d*(n+k)) get correctly merged by
+// the compiler into (c-d)*n - d*k (with c-d and d*k computed at
+// compile time).
+//
+// The merging is performed by a combination of the multiplication
+// merge rules
+//  (c*n - d*n) -> (c-d)*n
+// and the distributive multiplication rules
+//  c * (d-x)  ->  c*d - c*x
+
+// Generate a MergeTest that looks like this:
+//
+//   a8, b8 = m1*n8 - m2*(n8+k), (m1-m2)*n8 - m2*k
+//   if a8 != b8 {
+//        // print error msg and panic
+//   }
+func makeMergeSubTest(m1, m2, k int, size string) string {
+
+       model := "    a" + size + ", b" + size
+       model += fmt.Sprintf(" = %%d*n%s - %%d*(n%s+%%d), (%%d-%%d)*n%s - (%%d*%%d)", size, size, size)
+
+       test := fmt.Sprintf(model, m1, m2, k, m1, m2, m2, k)
+       test += fmt.Sprintf(`
+    if a%s != b%s {
+        fmt.Printf("MergeSubTest(%d, %d, %d, %s) failed\n")
         fmt.Printf("%%d != %%d\n", a%s, b%s)
         panic("FAIL")
     }
@@ -42,10 +74,14 @@ func makeMergeTest(m1, m2, k int, size string) string {
 
 func makeAllSizes(m1, m2, k int) string {
        var tests string
-       tests += makeMergeTest(m1, m2, k, "8")
-       tests += makeMergeTest(m1, m2, k, "16")
-       tests += makeMergeTest(m1, m2, k, "32")
-       tests += makeMergeTest(m1, m2, k, "64")
+       tests += makeMergeAddTest(m1, m2, k, "8")
+       tests += makeMergeAddTest(m1, m2, k, "16")
+       tests += makeMergeAddTest(m1, m2, k, "32")
+       tests += makeMergeAddTest(m1, m2, k, "64")
+       tests += makeMergeSubTest(m1, m2, k, "8")
+       tests += makeMergeSubTest(m1, m2, k, "16")
+       tests += makeMergeSubTest(m1, m2, k, "32")
+       tests += makeMergeSubTest(m1, m2, k, "64")
        tests += "\n"
        return tests
 }