cmd/compile: clean up and optimize s390x multiplication rules

author Michael Munday <mike.munday@ibm.com>

Thu, 4 Jun 2020 17:55:01 +0000 (10:55 -0700)

committer Michael Munday <mike.munday@ibm.com>

Tue, 18 Aug 2020 15:39:44 +0000 (15:39 +0000)
author Michael Munday <mike.munday@ibm.com>
Thu, 4 Jun 2020 17:55:01 +0000 (10:55 -0700)
committer Michael Munday <mike.munday@ibm.com>
Tue, 18 Aug 2020 15:39:44 +0000 (15:39 +0000)
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules

index d3234c1a00830a3f58b321e283313f62247ab2ff..5e4c436ca11184a8641568237b9aa1fce40fd7c3 100644 (file)
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -716,20 +716,40 @@
  (ANDWconst [0xFF] x) => (MOVBZreg x)
  (ANDWconst [0xFFFF] x) => (MOVHZreg x)
  
-// strength reduction
-(MULLDconst [-1] x) => (NEG x)
-(MULLDconst [0] _) => (MOVDconst [0])
-(MULLDconst [1] x) => x
-(MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
-(MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
-(MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
-
-(MULLWconst [-1] x) => (NEGW x)
-(MULLWconst [0] _) => (MOVDconst [0])
-(MULLWconst [1] x) => x
-(MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
-(MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
-(MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+// Strength reduce multiplication to the sum (or difference) of two powers of two.
+//
+// Examples:
+//     5x -> 4x + 1x
+//    10x -> 8x + 2x
+//   120x -> 128x - 8x
+//  -120x -> 8x - 128x
+//
+// We know that the rightmost bit of any positive value, once isolated, must either
+// be a power of 2 (because it is a single bit) or 0 (if the original value is 0).
+// In all of these rules we use a rightmost bit calculation to determine one operand
+// for the addition or subtraction. We then just need to calculate if the other
+// operand is a valid power of 2 before we can match the rule.
+//
+// Notes:
+//   - the generic rules have already matched single powers of two so we ignore them here
+//   - isPowerOfTwo32 asserts that its argument is greater than 0
+//   - c&(c-1) = clear rightmost bit
+//   - c&^(c-1) = isolate rightmost bit
+
+// c = 2ˣ + 2ʸ => c - 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c&(c-1))
+  => ((ADD|ADDW) (SL(D|W)const <t> x [int8(log32(c&(c-1)))])
+                 (SL(D|W)const <t> x [int8(log32(c&^(c-1)))]))
+
+// c = 2ʸ - 2ˣ => c + 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c+(c&^(c-1)))
+  => ((SUB|SUBW) (SL(D|W)const <t> x [int8(log32(c+(c&^(c-1))))])
+                 (SL(D|W)const <t> x [int8(log32(c&^(c-1)))]))
+
+// c = 2ˣ - 2ʸ => -c + 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(-c+(-c&^(-c-1)))
+  => ((SUB|SUBW) (SL(D|W)const <t> x [int8(log32(-c&^(-c-1)))])
+                 (SL(D|W)const <t> x [int8(log32(-c+(-c&^(-c-1))))]))
  
  // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
  (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
@@ -1133,6 +1153,9 @@
  (XORconst [0] x)                  => x
  (XORWconst [c] x) && int32(c)==0   => x
  
+// Shifts by zero (may be inserted during multiplication strength reduction).
+((SLD|SLW|SRD|SRW|SRAD|SRAW)const x [0]) => x
+
  // Convert constant subtracts to constant adds.
  (SUBconst [c] x) && c != -(1<<31) => (ADDconst [-c] x)
  (SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go

index dc9b143562825b2384ad4eb5952b0b009194ce90..536f8db3205d70f03cfcc765ebdbb12fad2d760e 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -732,8 +732,12 @@ func rewriteValueS390X(v *Value) bool {
                 return rewriteValueS390X_OpS390XRLLG(v)
         case OpS390XSLD:
                 return rewriteValueS390X_OpS390XSLD(v)
+       case OpS390XSLDconst:
+               return rewriteValueS390X_OpS390XSLDconst(v)
         case OpS390XSLW:
                 return rewriteValueS390X_OpS390XSLW(v)
+       case OpS390XSLWconst:
+               return rewriteValueS390X_OpS390XSLWconst(v)
         case OpS390XSRAD:
                 return rewriteValueS390X_OpS390XSRAD(v)
         case OpS390XSRADconst:
@@ -748,6 +752,8 @@ func rewriteValueS390X(v *Value) bool {
                 return rewriteValueS390X_OpS390XSRDconst(v)
         case OpS390XSRW:
                 return rewriteValueS390X_OpS390XSRW(v)
+       case OpS390XSRWconst:
+               return rewriteValueS390X_OpS390XSRWconst(v)
         case OpS390XSTM2:
                 return rewriteValueS390X_OpS390XSTM2(v)
         case OpS390XSTMG2:
@@ -13853,81 +13859,64 @@ func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
  func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (MULLDconst [-1] x)
-       // result: (NEG x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(c&(c-1))
+       // result: (ADD (SLDconst <t> x [int8(log32(c&(c-1)))]) (SLDconst <t> x [int8(log32(c&^(c-1)))]))
         for {
-               if auxIntToInt32(v.AuxInt) != -1 {
-                       break
-               }
-               x := v_0
-               v.reset(OpS390XNEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [0] _)
-       // result: (MOVDconst [0])
-       for {
-               if auxIntToInt32(v.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MULLDconst [1] x)
-       // result: x
-       for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               x := v_0
-               v.copyOf(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLDconst [log2(c)] x)
-       for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c)) {
+               if !(isPowerOfTwo32(c & (c - 1))) {
                         break
                 }
-               v.reset(OpS390XSLDconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpS390XADD)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c & (c - 1))))
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(c+(c&^(c-1)))
+       // result: (SUB (SLDconst <t> x [int8(log32(c+(c&^(c-1))))]) (SLDconst <t> x [int8(log32(c&^(c-1)))]))
         for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if !(isPowerOfTwo32(c + (c &^ (c - 1)))) {
                         break
                 }
                 v.reset(OpS390XSUB)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c + (c &^ (c - 1)))))
                 v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(-c+(-c&^(-c-1)))
+       // result: (SUB (SLDconst <t> x [int8(log32(-c&^(-c-1)))]) (SLDconst <t> x [int8(log32(-c+(-c&^(-c-1))))]))
         for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if !(isPowerOfTwo32(-c + (-c &^ (-c - 1)))) {
                         break
                 }
-               v.reset(OpS390XADD)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(-c &^ (-c - 1))))
                 v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(-c + (-c &^ (-c - 1)))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
         // match: (MULLDconst [c] (MOVDconst [d]))
@@ -14097,81 +14086,64 @@ func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
  func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (MULLWconst [-1] x)
-       // result: (NEGW x)
-       for {
-               if auxIntToInt32(v.AuxInt) != -1 {
-                       break
-               }
-               x := v_0
-               v.reset(OpS390XNEGW)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [0] _)
-       // result: (MOVDconst [0])
-       for {
-               if auxIntToInt32(v.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MULLWconst [1] x)
-       // result: x
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(c&(c-1))
+       // result: (ADDW (SLWconst <t> x [int8(log32(c&(c-1)))]) (SLWconst <t> x [int8(log32(c&^(c-1)))]))
         for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               x := v_0
-               v.copyOf(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLWconst [log2(c)] x)
-       for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c)) {
+               if !(isPowerOfTwo32(c & (c - 1))) {
                         break
                 }
-               v.reset(OpS390XSLWconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpS390XADDW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c & (c - 1))))
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(c+(c&^(c-1)))
+       // result: (SUBW (SLWconst <t> x [int8(log32(c+(c&^(c-1))))]) (SLWconst <t> x [int8(log32(c&^(c-1)))]))
         for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if !(isPowerOfTwo32(c + (c &^ (c - 1)))) {
                         break
                 }
                 v.reset(OpS390XSUBW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c + (c &^ (c - 1)))))
                 v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(-c+(-c&^(-c-1)))
+       // result: (SUBW (SLWconst <t> x [int8(log32(-c&^(-c-1)))]) (SLWconst <t> x [int8(log32(-c+(-c&^(-c-1))))]))
         for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                 x := v_0
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if !(isPowerOfTwo32(-c + (-c &^ (-c - 1)))) {
                         break
                 }
-               v.reset(OpS390XADDW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpS390XSUBW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(-c &^ (-c - 1))))
                 v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(-c + (-c &^ (-c - 1)))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                 return true
         }
         // match: (MULLWconst [c] (MOVDconst [d]))
@@ -16826,6 +16798,20 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool {
         }
         return false
  }
+func rewriteValueS390X_OpS390XSLDconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SLDconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
  func rewriteValueS390X_OpS390XSLW(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -16960,6 +16946,20 @@ func rewriteValueS390X_OpS390XSLW(v *Value) bool {
         }
         return false
  }
+func rewriteValueS390X_OpS390XSLWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SLWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
  func rewriteValueS390X_OpS390XSRAD(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -17096,6 +17096,16 @@ func rewriteValueS390X_OpS390XSRAD(v *Value) bool {
  }
  func rewriteValueS390X_OpS390XSRADconst(v *Value) bool {
         v_0 := v.Args[0]
+       // match: (SRADconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
         // match: (SRADconst [c] (MOVDconst [d]))
         // result: (MOVDconst [d>>uint64(c)])
         for {
@@ -17246,6 +17256,16 @@ func rewriteValueS390X_OpS390XSRAW(v *Value) bool {
  }
  func rewriteValueS390X_OpS390XSRAWconst(v *Value) bool {
         v_0 := v.Args[0]
+       // match: (SRAWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
         // match: (SRAWconst [c] (MOVDconst [d]))
         // result: (MOVDconst [int64(int32(d))>>uint64(c)])
         for {
@@ -17416,6 +17436,16 @@ func rewriteValueS390X_OpS390XSRDconst(v *Value) bool {
                 v.AddArg(v0)
                 return true
         }
+       // match: (SRDconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
         return false
  }
  func rewriteValueS390X_OpS390XSRW(v *Value) bool {
@@ -17552,6 +17582,20 @@ func rewriteValueS390X_OpS390XSRW(v *Value) bool {
         }
         return false
  }
+func rewriteValueS390X_OpS390XSRWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SRWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
  func rewriteValueS390X_OpS390XSTM2(v *Value) bool {
         v_3 := v.Args[3]
         v_2 := v.Args[2]
diff --git a/src/cmd/compile/internal/test/mulconst_test.go b/src/cmd/compile/internal/test/mulconst_test.go

new file mode 100644 (file)

index 0000000..314cab3
--- /dev/null
+++ b/src/cmd/compile/internal/test/mulconst_test.go
@@ -0,0 +1,242 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import "testing"
+
+// Benchmark multiplication of an integer by various constants.
+//
+// The comment above each sub-benchmark provides an example of how the
+// target multiplication operation might be implemented using shift
+// (multiplication by a power of 2), addition and subtraction
+// operations. It is platform-dependent whether these transformations
+// are actually applied.
+
+var (
+       mulSinkI32 int32
+       mulSinkI64 int64
+       mulSinkU32 uint32
+       mulSinkU64 uint64
+)
+
+func BenchmarkMulconstI32(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkI32 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkI32 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkI32 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkI32 = x
+       })
+       // -120x = 8x - 120x
+       b.Run("-120", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= -120
+               }
+               mulSinkI32 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkI32 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkI32 = x
+       })
+}
+
+func BenchmarkMulconstI64(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkI64 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkI64 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkI64 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkI64 = x
+       })
+       // -120x = 8x - 120x
+       b.Run("-120", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= -120
+               }
+               mulSinkI64 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkI64 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkI64 = x
+       })
+}
+
+func BenchmarkMulconstU32(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkU32 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkU32 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkU32 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkU32 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkU32 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkU32 = x
+       })
+}
+
+func BenchmarkMulconstU64(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkU64 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkU64 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkU64 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkU64 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkU64 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkU64 = x
+       })
+}
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go

index 8f25974376ba6277e78305c34ef9ca5b5a517130..9f30ec8ce42c2956931b4b4b7d51d913571faf41 100644 (file)
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -71,9 +71,15 @@ func Mul_96(n int) int {
         // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
         // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
         // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+       // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
         return n * 96
  }
  
+func Mul_n120(n int) int {
+       // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
+       return n * -120
+}
+
  func MulMemSrc(a []uint32, b []float32) {
         // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
         a[0] *= a[1]
author	Michael Munday <mike.munday@ibm.com>
	Thu, 4 Jun 2020 17:55:01 +0000 (10:55 -0700)
committer	Michael Munday <mike.munday@ibm.com>
	Tue, 18 Aug 2020 15:39:44 +0000 (15:39 +0000)
src/cmd/compile/internal/ssa/gen/S390X.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteS390X.go		patch \| blob \| history
src/cmd/compile/internal/test/mulconst_test.go	[new file with mode: 0644]	patch \| blob
test/codegen/arithmetic.go		patch \| blob \| history