]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: clean up and optimize s390x multiplication rules
authorMichael Munday <mike.munday@ibm.com>
Thu, 4 Jun 2020 17:55:01 +0000 (10:55 -0700)
committerMichael Munday <mike.munday@ibm.com>
Tue, 18 Aug 2020 15:39:44 +0000 (15:39 +0000)
Some of the existing optimizations aren't triggered because they
are handled by the generic rules so this CL removes them. Also
some constraints were copied without much thought from the amd64
rules and they don't make sense on s390x, so we remove those
constraints.

Finally, add a 'multiply by the sum of two powers of two'
optimization. This makes sense on s390x as shifts are low latency
and can also sometimes be optimized further (especially if we add
support for RISBG instructions).

name                   old time/op  new time/op  delta
IntMulByConst/3-8      1.70ns ±11%  1.10ns ± 5%  -35.26%  (p=0.000 n=10+10)
IntMulByConst/5-8      1.64ns ± 7%  1.10ns ± 4%  -32.94%  (p=0.000 n=10+9)
IntMulByConst/12-8     1.65ns ± 6%  1.20ns ± 4%  -27.16%  (p=0.000 n=10+9)
IntMulByConst/120-8    1.66ns ± 4%  1.22ns ±13%  -26.43%  (p=0.000 n=10+10)
IntMulByConst/-120-8   1.65ns ± 7%  1.19ns ± 4%  -28.06%  (p=0.000 n=9+10)
IntMulByConst/65537-8  0.86ns ± 9%  1.12ns ±12%  +30.41%  (p=0.000 n=10+10)
IntMulByConst/65538-8  1.65ns ± 5%  1.23ns ± 5%  -25.11%  (p=0.000 n=10+10)

Change-Id: Ib196e6bff1e97febfd266134d0a2b2a62897989f
Reviewed-on: https://go-review.googlesource.com/c/go/+/248937
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/ssa/gen/S390X.rules
src/cmd/compile/internal/ssa/rewriteS390X.go
src/cmd/compile/internal/test/mulconst_test.go [new file with mode: 0644]
test/codegen/arithmetic.go

index d3234c1a00830a3f58b321e283313f62247ab2ff..5e4c436ca11184a8641568237b9aa1fce40fd7c3 100644 (file)
 (ANDWconst [0xFF] x) => (MOVBZreg x)
 (ANDWconst [0xFFFF] x) => (MOVHZreg x)
 
-// strength reduction
-(MULLDconst [-1] x) => (NEG x)
-(MULLDconst [0] _) => (MOVDconst [0])
-(MULLDconst [1] x) => x
-(MULLDconst [c] x) && isPowerOfTwo(c) -> (SLDconst [log2(c)] x)
-(MULLDconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
-(MULLDconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
-
-(MULLWconst [-1] x) => (NEGW x)
-(MULLWconst [0] _) => (MOVDconst [0])
-(MULLWconst [1] x) => x
-(MULLWconst [c] x) && isPowerOfTwo(c) -> (SLWconst [log2(c)] x)
-(MULLWconst [c] x) && isPowerOfTwo(c+1) && c >= 15 -> (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
-(MULLWconst [c] x) && isPowerOfTwo(c-1) && c >= 17 -> (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+// Strength reduce multiplication to the sum (or difference) of two powers of two.
+//
+// Examples:
+//     5x -> 4x + 1x
+//    10x -> 8x + 2x
+//   120x -> 128x - 8x
+//  -120x -> 8x - 128x
+//
+// We know that the rightmost bit of any positive value, once isolated, must either
+// be a power of 2 (because it is a single bit) or 0 (if the original value is 0).
+// In all of these rules we use a rightmost bit calculation to determine one operand
+// for the addition or subtraction. We then just need to calculate if the other
+// operand is a valid power of 2 before we can match the rule.
+//
+// Notes:
+//   - the generic rules have already matched single powers of two so we ignore them here
+//   - isPowerOfTwo32 asserts that its argument is greater than 0
+//   - c&(c-1) = clear rightmost bit
+//   - c&^(c-1) = isolate rightmost bit
+
+// c = 2ˣ + 2ʸ => c - 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c&(c-1))
+  => ((ADD|ADDW) (SL(D|W)const <t> x [int8(log32(c&(c-1)))])
+                 (SL(D|W)const <t> x [int8(log32(c&^(c-1)))]))
+
+// c = 2ʸ - 2ˣ => c + 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(c+(c&^(c-1)))
+  => ((SUB|SUBW) (SL(D|W)const <t> x [int8(log32(c+(c&^(c-1))))])
+                 (SL(D|W)const <t> x [int8(log32(c&^(c-1)))]))
+
+// c = 2ˣ - 2ʸ => -c + 2ˣ = 2ʸ
+(MULL(D|W)const <t> x [c]) && isPowerOfTwo32(-c+(-c&^(-c-1)))
+  => ((SUB|SUBW) (SL(D|W)const <t> x [int8(log32(-c&^(-c-1)))])
+                 (SL(D|W)const <t> x [int8(log32(-c+(-c&^(-c-1))))]))
 
 // Fold ADD into MOVDaddr. Odd offsets from SB shouldn't be folded (LARL can't handle them).
 (ADDconst [c] (MOVDaddr [d] {s} x:(SB))) && ((c+d)&1 == 0) && is32Bit(c+d) -> (MOVDaddr [c+d] {s} x)
 (XORconst [0] x)                  => x
 (XORWconst [c] x) && int32(c)==0   => x
 
+// Shifts by zero (may be inserted during multiplication strength reduction).
+((SLD|SLW|SRD|SRW|SRAD|SRAW)const x [0]) => x
+
 // Convert constant subtracts to constant adds.
 (SUBconst [c] x) && c != -(1<<31) => (ADDconst [-c] x)
 (SUBWconst [c] x) -> (ADDWconst [int64(int32(-c))] x)
index dc9b143562825b2384ad4eb5952b0b009194ce90..536f8db3205d70f03cfcc765ebdbb12fad2d760e 100644 (file)
@@ -732,8 +732,12 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XRLLG(v)
        case OpS390XSLD:
                return rewriteValueS390X_OpS390XSLD(v)
+       case OpS390XSLDconst:
+               return rewriteValueS390X_OpS390XSLDconst(v)
        case OpS390XSLW:
                return rewriteValueS390X_OpS390XSLW(v)
+       case OpS390XSLWconst:
+               return rewriteValueS390X_OpS390XSLWconst(v)
        case OpS390XSRAD:
                return rewriteValueS390X_OpS390XSRAD(v)
        case OpS390XSRADconst:
@@ -748,6 +752,8 @@ func rewriteValueS390X(v *Value) bool {
                return rewriteValueS390X_OpS390XSRDconst(v)
        case OpS390XSRW:
                return rewriteValueS390X_OpS390XSRW(v)
+       case OpS390XSRWconst:
+               return rewriteValueS390X_OpS390XSRWconst(v)
        case OpS390XSTM2:
                return rewriteValueS390X_OpS390XSTM2(v)
        case OpS390XSTMG2:
@@ -13853,81 +13859,64 @@ func rewriteValueS390X_OpS390XMULLD(v *Value) bool {
 func rewriteValueS390X_OpS390XMULLDconst(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MULLDconst [-1] x)
-       // result: (NEG x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(c&(c-1))
+       // result: (ADD (SLDconst <t> x [int8(log32(c&(c-1)))]) (SLDconst <t> x [int8(log32(c&^(c-1)))]))
        for {
-               if auxIntToInt32(v.AuxInt) != -1 {
-                       break
-               }
-               x := v_0
-               v.reset(OpS390XNEG)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLDconst [0] _)
-       // result: (MOVDconst [0])
-       for {
-               if auxIntToInt32(v.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MULLDconst [1] x)
-       // result: x
-       for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               x := v_0
-               v.copyOf(x)
-               return true
-       }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLDconst [log2(c)] x)
-       for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c)) {
+               if !(isPowerOfTwo32(c & (c - 1))) {
                        break
                }
-               v.reset(OpS390XSLDconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpS390XADD)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c & (c - 1))))
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUB (SLDconst <v.Type> [log2(c+1)] x) x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(c+(c&^(c-1)))
+       // result: (SUB (SLDconst <t> x [int8(log32(c+(c&^(c-1))))]) (SLDconst <t> x [int8(log32(c&^(c-1)))]))
        for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if !(isPowerOfTwo32(c + (c &^ (c - 1)))) {
                        break
                }
                v.reset(OpS390XSUB)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c + (c &^ (c - 1)))))
                v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
-       // match: (MULLDconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADD (SLDconst <v.Type> [log2(c-1)] x) x)
+       // match: (MULLDconst <t> x [c])
+       // cond: isPowerOfTwo32(-c+(-c&^(-c-1)))
+       // result: (SUB (SLDconst <t> x [int8(log32(-c&^(-c-1)))]) (SLDconst <t> x [int8(log32(-c+(-c&^(-c-1))))]))
        for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if !(isPowerOfTwo32(-c + (-c &^ (-c - 1)))) {
                        break
                }
-               v.reset(OpS390XADD)
-               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, v.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpS390XSUB)
+               v0 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(-c &^ (-c - 1))))
                v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLDconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(-c + (-c &^ (-c - 1)))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
        // match: (MULLDconst [c] (MOVDconst [d]))
@@ -14097,81 +14086,64 @@ func rewriteValueS390X_OpS390XMULLW(v *Value) bool {
 func rewriteValueS390X_OpS390XMULLWconst(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
-       // match: (MULLWconst [-1] x)
-       // result: (NEGW x)
-       for {
-               if auxIntToInt32(v.AuxInt) != -1 {
-                       break
-               }
-               x := v_0
-               v.reset(OpS390XNEGW)
-               v.AddArg(x)
-               return true
-       }
-       // match: (MULLWconst [0] _)
-       // result: (MOVDconst [0])
-       for {
-               if auxIntToInt32(v.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpS390XMOVDconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-       // match: (MULLWconst [1] x)
-       // result: x
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(c&(c-1))
+       // result: (ADDW (SLWconst <t> x [int8(log32(c&(c-1)))]) (SLWconst <t> x [int8(log32(c&^(c-1)))]))
        for {
-               if auxIntToInt32(v.AuxInt) != 1 {
-                       break
-               }
-               x := v_0
-               v.copyOf(x)
-               return true
-       }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c)
-       // result: (SLWconst [log2(c)] x)
-       for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c)) {
+               if !(isPowerOfTwo32(c & (c - 1))) {
                        break
                }
-               v.reset(OpS390XSLWconst)
-               v.AuxInt = log2(c)
-               v.AddArg(x)
+               v.reset(OpS390XADDW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c & (c - 1))))
+               v0.AddArg(x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c+1) && c >= 15
-       // result: (SUBW (SLWconst <v.Type> [log2(c+1)] x) x)
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(c+(c&^(c-1)))
+       // result: (SUBW (SLWconst <t> x [int8(log32(c+(c&^(c-1))))]) (SLWconst <t> x [int8(log32(c&^(c-1)))]))
        for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c+1) && c >= 15) {
+               if !(isPowerOfTwo32(c + (c &^ (c - 1)))) {
                        break
                }
                v.reset(OpS390XSUBW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c + 1)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(c + (c &^ (c - 1)))))
                v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(c &^ (c - 1))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
-       // match: (MULLWconst [c] x)
-       // cond: isPowerOfTwo(c-1) && c >= 17
-       // result: (ADDW (SLWconst <v.Type> [log2(c-1)] x) x)
+       // match: (MULLWconst <t> x [c])
+       // cond: isPowerOfTwo32(-c+(-c&^(-c-1)))
+       // result: (SUBW (SLWconst <t> x [int8(log32(-c&^(-c-1)))]) (SLWconst <t> x [int8(log32(-c+(-c&^(-c-1))))]))
        for {
-               c := v.AuxInt
+               t := v.Type
+               c := auxIntToInt32(v.AuxInt)
                x := v_0
-               if !(isPowerOfTwo(c-1) && c >= 17) {
+               if !(isPowerOfTwo32(-c + (-c &^ (-c - 1)))) {
                        break
                }
-               v.reset(OpS390XADDW)
-               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, v.Type)
-               v0.AuxInt = log2(c - 1)
+               v.reset(OpS390XSUBW)
+               v0 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v0.AuxInt = int8ToAuxInt(int8(log32(-c &^ (-c - 1))))
                v0.AddArg(x)
-               v.AddArg2(v0, x)
+               v1 := b.NewValue0(v.Pos, OpS390XSLWconst, t)
+               v1.AuxInt = int8ToAuxInt(int8(log32(-c + (-c &^ (-c - 1)))))
+               v1.AddArg(x)
+               v.AddArg2(v0, v1)
                return true
        }
        // match: (MULLWconst [c] (MOVDconst [d]))
@@ -16826,6 +16798,20 @@ func rewriteValueS390X_OpS390XSLD(v *Value) bool {
        }
        return false
 }
+func rewriteValueS390X_OpS390XSLDconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SLDconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpS390XSLW(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -16960,6 +16946,20 @@ func rewriteValueS390X_OpS390XSLW(v *Value) bool {
        }
        return false
 }
+func rewriteValueS390X_OpS390XSLWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SLWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpS390XSRAD(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -17096,6 +17096,16 @@ func rewriteValueS390X_OpS390XSRAD(v *Value) bool {
 }
 func rewriteValueS390X_OpS390XSRADconst(v *Value) bool {
        v_0 := v.Args[0]
+       // match: (SRADconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
        // match: (SRADconst [c] (MOVDconst [d]))
        // result: (MOVDconst [d>>uint64(c)])
        for {
@@ -17246,6 +17256,16 @@ func rewriteValueS390X_OpS390XSRAW(v *Value) bool {
 }
 func rewriteValueS390X_OpS390XSRAWconst(v *Value) bool {
        v_0 := v.Args[0]
+       // match: (SRAWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
        // match: (SRAWconst [c] (MOVDconst [d]))
        // result: (MOVDconst [int64(int32(d))>>uint64(c)])
        for {
@@ -17416,6 +17436,16 @@ func rewriteValueS390X_OpS390XSRDconst(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (SRDconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
        return false
 }
 func rewriteValueS390X_OpS390XSRW(v *Value) bool {
@@ -17552,6 +17582,20 @@ func rewriteValueS390X_OpS390XSRW(v *Value) bool {
        }
        return false
 }
+func rewriteValueS390X_OpS390XSRWconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SRWconst x [0])
+       // result: x
+       for {
+               if auxIntToInt8(v.AuxInt) != 0 {
+                       break
+               }
+               x := v_0
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
 func rewriteValueS390X_OpS390XSTM2(v *Value) bool {
        v_3 := v.Args[3]
        v_2 := v.Args[2]
diff --git a/src/cmd/compile/internal/test/mulconst_test.go b/src/cmd/compile/internal/test/mulconst_test.go
new file mode 100644 (file)
index 0000000..314cab3
--- /dev/null
@@ -0,0 +1,242 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package test
+
+import "testing"
+
+// Benchmark multiplication of an integer by various constants.
+//
+// The comment above each sub-benchmark provides an example of how the
+// target multiplication operation might be implemented using shift
+// (multiplication by a power of 2), addition and subtraction
+// operations. It is platform-dependent whether these transformations
+// are actually applied.
+
+var (
+       mulSinkI32 int32
+       mulSinkI64 int64
+       mulSinkU32 uint32
+       mulSinkU64 uint64
+)
+
+func BenchmarkMulconstI32(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkI32 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkI32 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkI32 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkI32 = x
+       })
+       // -120x = 8x - 120x
+       b.Run("-120", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= -120
+               }
+               mulSinkI32 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkI32 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := int32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkI32 = x
+       })
+}
+
+func BenchmarkMulconstI64(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkI64 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkI64 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkI64 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkI64 = x
+       })
+       // -120x = 8x - 120x
+       b.Run("-120", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= -120
+               }
+               mulSinkI64 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkI64 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := int64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkI64 = x
+       })
+}
+
+func BenchmarkMulconstU32(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkU32 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkU32 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkU32 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkU32 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkU32 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := uint32(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkU32 = x
+       })
+}
+
+func BenchmarkMulconstU64(b *testing.B) {
+       // 3x = 2x + x
+       b.Run("3", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 3
+               }
+               mulSinkU64 = x
+       })
+       // 5x = 4x + x
+       b.Run("5", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 5
+               }
+               mulSinkU64 = x
+       })
+       // 12x = 8x + 4x
+       b.Run("12", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 12
+               }
+               mulSinkU64 = x
+       })
+       // 120x = 128x - 8x
+       b.Run("120", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 120
+               }
+               mulSinkU64 = x
+       })
+       // 65537x = 65536x + x
+       b.Run("65537", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65537
+               }
+               mulSinkU64 = x
+       })
+       // 65538x = 65536x + 2x
+       b.Run("65538", func(b *testing.B) {
+               x := uint64(1)
+               for i := 0; i < b.N; i++ {
+                       x *= 65538
+               }
+               mulSinkU64 = x
+       })
+}
index 8f25974376ba6277e78305c34ef9ca5b5a517130..9f30ec8ce42c2956931b4b4b7d51d913571faf41 100644 (file)
@@ -71,9 +71,15 @@ func Mul_96(n int) int {
        // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
        // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
        // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+       // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
        return n * 96
 }
 
+func Mul_n120(n int) int {
+       // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
+       return n * -120
+}
+
 func MulMemSrc(a []uint32, b []float32) {
        // 386:`IMULL\s4\([A-Z]+\),\s[A-Z]+`
        a[0] *= a[1]