]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify math.MulUintptr on PPC64
authorPaul E. Murphy <murp@ibm.com>
Wed, 27 Mar 2024 21:03:11 +0000 (16:03 -0500)
committerPaul Murphy <murp@ibm.com>
Mon, 26 Aug 2024 17:02:43 +0000 (17:02 +0000)
This can be done efficiently with few instructions.

This also adds MULHDUCC for further codegen improvement.

Change-Id: I06320ba4383a679341b911a237a360ef07b19168
Reviewed-on: https://go-review.googlesource.com/c/go/+/605975
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Archana Ravindar <aravinda@redhat.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/_gen/PPC64.rules
src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
src/cmd/compile/internal/ssa/_gen/PPC64latelower.rules
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssa/rewritePPC64latelower.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
test/codegen/bool.go

index 367fd2f6b05da10333a0b333bfb3a05368db590e..0c5137f97db4287fc28e12ca9fde2f954f032771 100644 (file)
@@ -594,7 +594,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Reg = r
 
        case ssa.OpPPC64ADDCC, ssa.OpPPC64ANDCC, ssa.OpPPC64SUBCC, ssa.OpPPC64ORCC, ssa.OpPPC64XORCC, ssa.OpPPC64NORCC,
-               ssa.OpPPC64ANDNCC:
+               ssa.OpPPC64ANDNCC, ssa.OpPPC64MULHDUCC:
                r1 := v.Args[0].Reg()
                r2 := v.Args[1].Reg()
                p := s.Prog(v.Op.Asm())
index d89cc5971471e209b0eaea0c6a064413cfae2e61..323ec520fa73621930509ce1795be523116c38c0 100644 (file)
@@ -40,6 +40,8 @@
 (Mul(32|16|8) ...) => (MULLW ...)
 (Select0 (Mul64uhilo x y)) => (MULHDU x y)
 (Select1 (Mul64uhilo x y)) => (MULLD x y)
+(Select0 (Mul64uover x y)) => (MULLD x y)
+(Select1 (Mul64uover x y)) => (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
 
 (Div64 [false] x y) => (DIVD x y)
 (Div64u ...) => (DIVDU ...)
index 799881a8cdd1805ea7fcac2278a5db563e8b983c..eab185e05cd8912ed9f546185bfdb2e99c2f4c46 100644 (file)
@@ -199,10 +199,11 @@ func init() {
                {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
                {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"},                  // (arg0*arg1)+arg2 (signed 64-bit)
 
-               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},   // (arg0 * arg1) >> 64, signed
-               {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},   // (arg0 * arg1) >> 32, signed
-               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned
-               {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned
+               {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true},                             // (arg0 * arg1) >> 64, signed
+               {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true},                             // (arg0 * arg1) >> 32, signed
+               {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true},                           // (arg0 * arg1) >> 64, unsigned
+               {name: "MULHDUCC", argLength: 2, reg: gp21, asm: "MULHDUCC", commutative: true, typ: "(Int64,Flags)"}, // (arg0 * arg1) >> 64, unsigned, sets CC
+               {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true},                           // (arg0 * arg1) >> 32, unsigned
 
                {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true},   // arg0*arg1
                {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1
index 7aa8f41e7887583ff7a9baf05dfa2ae66a1dd119..15e6f7251977c796f7bb547b7ff238feca0ef9f9 100644 (file)
 //   2. Rewrite (CMPconst [0] (Select0 (OpCC ...))) into (Select1 (OpCC...))
 // Note: to minimize potentially expensive regeneration of CC opcodes during the flagalloc pass, only rewrite if
 //       both ops are in the same block.
-(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
+(CMPconst [0] z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU) x y)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
 (CMPconst [0] z:((NEG|CNTLZD|RLDICL) x)) && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
 // Note: ADDCCconst only assembles to 1 instruction for int16 constants.
 (CMPconst [0] z:(ADDconst [c] x)) && int64(int16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
 (CMPconst [0] z:(ANDconst [c] x)) && int64(uint16(c)) == c && v.Block == z.Block => (CMPconst [0] convertPPC64OpToOpCC(z))
 // And finally, fixup the flag user.
-(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR)CC x y))) => (Select1 <t> z)
+(CMPconst <t> [0] (Select0 z:((ADD|AND|ANDN|OR|SUB|NOR|XOR|MULHDU)CC x y))) => (Select1 <t> z)
 (CMPconst <t> [0] (Select0 z:((ADDCCconst|ANDCCconst|NEGCC|CNTLZDCC|RLDICLCC) y))) => (Select1 <t> z)
 
 // After trying to convert ANDconst to ANDCCconst above, if the CC result is not needed, try to avoid using
index a1dafe37cf219a2ab84dd5d0357a3bc3eab67c25..cfea0342c8323ea6319b4924c21289db35dfaff1 100644 (file)
@@ -2142,6 +2142,7 @@ const (
        OpPPC64MULHD
        OpPPC64MULHW
        OpPPC64MULHDU
+       OpPPC64MULHDUCC
        OpPPC64MULHWU
        OpPPC64FMUL
        OpPPC64FMULS
@@ -28869,6 +28870,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:        "MULHDUCC",
+               argLen:      2,
+               commutative: true,
+               asm:         ppc64.AMULHDUCC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                               {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:        "MULHWU",
                argLen:      2,
index fd7deadcdc95405bd8bf9fb5145f3facab431e30..1f81217fc8ec35ff507dd835c26b78de08f85fdb 100644 (file)
@@ -1786,14 +1786,15 @@ func convertPPC64OpToOpCC(op *Value) *Value {
                OpPPC64ADD:      OpPPC64ADDCC,
                OpPPC64ADDconst: OpPPC64ADDCCconst,
                OpPPC64AND:      OpPPC64ANDCC,
-               OpPPC64ANDconst: OpPPC64ANDCCconst,
                OpPPC64ANDN:     OpPPC64ANDNCC,
+               OpPPC64ANDconst: OpPPC64ANDCCconst,
                OpPPC64CNTLZD:   OpPPC64CNTLZDCC,
+               OpPPC64MULHDU:   OpPPC64MULHDUCC,
+               OpPPC64NEG:      OpPPC64NEGCC,
+               OpPPC64NOR:      OpPPC64NORCC,
                OpPPC64OR:       OpPPC64ORCC,
                OpPPC64RLDICL:   OpPPC64RLDICLCC,
                OpPPC64SUB:      OpPPC64SUBCC,
-               OpPPC64NEG:      OpPPC64NEGCC,
-               OpPPC64NOR:      OpPPC64NORCC,
                OpPPC64XOR:      OpPPC64XORCC,
        }
        b := op.Block
index b45770995e5b2d587c1626143fdbf5ed3fa142c8..0811566114a733544e0766505e8f794c892e3d06 100644 (file)
@@ -14498,6 +14498,18 @@ func rewriteValuePPC64_OpSelect0(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (Select0 (Mul64uover x y))
+       // result: (MULLD x y)
+       for {
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpPPC64MULLD)
+               v.AddArg2(x, y)
+               return true
+       }
        // match: (Select0 (Add64carry x y c))
        // result: (Select0 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1]))))
        for {
@@ -14558,6 +14570,24 @@ func rewriteValuePPC64_OpSelect1(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (Select1 (Mul64uover x y))
+       // result: (SETBCR [2] (CMPconst [0] (MULHDU <x.Type> x y)))
+       for {
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpPPC64SETBCR)
+               v.AuxInt = int32ToAuxInt(2)
+               v0 := b.NewValue0(v.Pos, OpPPC64CMPconst, types.TypeFlags)
+               v0.AuxInt = int64ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpPPC64MULHDU, x.Type)
+               v1.AddArg2(x, y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
        // match: (Select1 (Add64carry x y c))
        // result: (ADDZEzero (Select1 <typ.UInt64> (ADDE x y (Select1 <typ.UInt64> (ADDCconst c [-1])))))
        for {
index 23d8601fb4733ee24a592bb2191488b0c2a201a4..18c05280c04bf01af49d255a08d46cc048f2bc0f 100644 (file)
@@ -296,6 +296,25 @@ func rewriteValuePPC64latelower_OpPPC64CMPconst(v *Value) bool {
                v.AddArg(convertPPC64OpToOpCC(z))
                return true
        }
+       // match: (CMPconst [0] z:(MULHDU x y))
+       // cond: v.Block == z.Block
+       // result: (CMPconst [0] convertPPC64OpToOpCC(z))
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               z := v_0
+               if z.Op != OpPPC64MULHDU {
+                       break
+               }
+               if !(v.Block == z.Block) {
+                       break
+               }
+               v.reset(OpPPC64CMPconst)
+               v.AuxInt = int64ToAuxInt(0)
+               v.AddArg(convertPPC64OpToOpCC(z))
+               return true
+       }
        // match: (CMPconst [0] z:(NEG x))
        // cond: v.Block == z.Block
        // result: (CMPconst [0] convertPPC64OpToOpCC(z))
@@ -505,6 +524,22 @@ func rewriteValuePPC64latelower_OpPPC64CMPconst(v *Value) bool {
                v.AddArg(z)
                return true
        }
+       // match: (CMPconst <t> [0] (Select0 z:(MULHDUCC x y)))
+       // result: (Select1 <t> z)
+       for {
+               t := v.Type
+               if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpSelect0 {
+                       break
+               }
+               z := v_0.Args[0]
+               if z.Op != OpPPC64MULHDUCC {
+                       break
+               }
+               v.reset(OpSelect1)
+               v.Type = t
+               v.AddArg(z)
+               return true
+       }
        // match: (CMPconst <t> [0] (Select0 z:(ADDCCconst y)))
        // result: (Select1 <t> z)
        for {
index f44531b88cc88af0f17fd8f1b5f82bfc6fdf690f..d0d35c3f5f534a85191732e0546d058e64575246 100644 (file)
@@ -91,7 +91,7 @@ func initIntrinsics() {
                        }
                        return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
                },
-               sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.RISCV64, sys.ARM64)
+               sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
        add("runtime", "KeepAlive",
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
index 768b0c42457a23275f401bea7fdea3d6677ed403..a778e95a12eb391c336fb14a57644373108fe6ca 100644 (file)
@@ -782,6 +782,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"ppc64", "internal/runtime/atomic", "Xchguintptr"}:        struct{}{},
        {"ppc64", "internal/runtime/math", "Add64"}:                struct{}{},
        {"ppc64", "internal/runtime/math", "Mul64"}:                struct{}{},
+       {"ppc64", "internal/runtime/math", "MulUintptr"}:           struct{}{},
        {"ppc64", "internal/runtime/sys", "Len64"}:                 struct{}{},
        {"ppc64", "internal/runtime/sys", "Len8"}:                  struct{}{},
        {"ppc64", "internal/runtime/sys", "OnesCount64"}:           struct{}{},
@@ -896,6 +897,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"ppc64le", "internal/runtime/atomic", "Xchguintptr"}:      struct{}{},
        {"ppc64le", "internal/runtime/math", "Add64"}:              struct{}{},
        {"ppc64le", "internal/runtime/math", "Mul64"}:              struct{}{},
+       {"ppc64le", "internal/runtime/math", "MulUintptr"}:         struct{}{},
        {"ppc64le", "internal/runtime/sys", "Len64"}:               struct{}{},
        {"ppc64le", "internal/runtime/sys", "Len8"}:                struct{}{},
        {"ppc64le", "internal/runtime/sys", "OnesCount64"}:         struct{}{},
index 0daeb88b9bf45762f45bf48b7241fb1992e6ea63..164ca1b2246aa37522a6b23d50e67a8903179128 100644 (file)
@@ -273,6 +273,12 @@ func TestLogicalCompareZero(x *[64]uint64) {
                x[12] = uint64(c)
        }
 
+       // ppc64x:"MULHDUCC",^"MULHDU"
+       hi, _ := bits.Mul64(x[13], x[14])
+       if hi != 0 {
+               x[14] = hi
+       }
+
 }
 
 func constantWrite(b bool, p *bool) {