From 871a3a409ac55fdcaf329596023e56be279b1950 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Mon, 26 Sep 2022 15:26:05 -0500 Subject: [PATCH] cmd/compile: rework PPC64 Mul64uhilo lowering rules Remove OpPPC64LoweredMuluhilo as this operation can be done more efficiently with MULHDU and MULLD directly. This has the benefit of not needing to use tuple select operations, and giving the scheduler more freedom to place these operations. The primary reason to avoid using tuples here is to to avoid suboptimal scheduling when carry ops (e.x ADDC/ADDE) are used in the same block as 64->128b multiples. CL 432275 modifies the scheduling priorities which may cause non-flag/non-carry generating tuple ops to interfere with carry opcodes. Thus resulting in excess saving and restoring of the XER register. This allows CL 432275 to adjust the scheduling priorities without having to workaround odd tuple scheduling behavior. Change-Id: Id04ef009ec4b86416e5436f2b44ae1474e73720e Reviewed-on: https://go-review.googlesource.com/c/go/+/434855 Run-TryBot: Paul Murphy Reviewed-by: Lynn Boger TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: Cherry Mui --- src/cmd/compile/internal/ppc64/ssa.go | 18 ------------- src/cmd/compile/internal/ssa/gen/PPC64.rules | 3 ++- src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 2 -- src/cmd/compile/internal/ssa/opGen.go | 16 ------------ src/cmd/compile/internal/ssa/rewritePPC64.go | 27 +++++++++++++++++--- 5 files changed, 26 insertions(+), 40 deletions(-) diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 41f863e7ce..9007cbe553 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -125,24 +125,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = y } - case ssa.OpPPC64LoweredMuluhilo: - // MULHDU Rarg1, Rarg0, Reg0 - // MULLD Rarg1, Rarg0, Reg1 - r0 := v.Args[0].Reg() - r1 := v.Args[1].Reg() - p := s.Prog(ppc64.AMULHDU) - p.From.Type = obj.TYPE_REG - p.From.Reg = r1 - p.Reg = r0 - p.To.Type = obj.TYPE_REG - p.To.Reg = v.Reg0() - p1 := s.Prog(ppc64.AMULLD) - p1.From.Type = obj.TYPE_REG - p1.From.Reg = r1 - p1.Reg = r0 - p1.To.Type = obj.TYPE_REG - p1.To.Reg = v.Reg1() - case ssa.OpPPC64LoweredAtomicAnd8, ssa.OpPPC64LoweredAtomicAnd32, ssa.OpPPC64LoweredAtomicOr8, diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 4eae2fc1af..e32c42d630 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -32,7 +32,8 @@ (Mul64 ...) => (MULLD ...) (Mul(32|16|8) ...) => (MULLW ...) -(Mul64uhilo ...) => (LoweredMuluhilo ...) +(Select0 (Mul64uhilo x y)) => (MULHDU x y) +(Select1 (Mul64uhilo x y)) => (MULLD x y) (Div64 [false] x y) => (DIVD x y) (Div64u ...) => (DIVDU ...) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 5e0ed6f3b5..30a6f6fe07 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -148,7 +148,6 @@ func init() { gp21xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, xer}, clobbers: xer} gp2xer1xer = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, xer}, outputs: []regMask{gp, xer}, clobbers: xer} gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} - gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} gp1cr = regInfo{inputs: []regMask{gp | sp | sb}} gp2cr = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}} crgp = regInfo{inputs: nil, outputs: []regMask{gp}} @@ -199,7 +198,6 @@ func init() { {name: "MULHW", argLength: 2, reg: gp21, asm: "MULHW", commutative: true}, // (arg0 * arg1) >> 32, signed {name: "MULHDU", argLength: 2, reg: gp21, asm: "MULHDU", commutative: true}, // (arg0 * arg1) >> 64, unsigned {name: "MULHWU", argLength: 2, reg: gp21, asm: "MULHWU", commutative: true}, // (arg0 * arg1) >> 32, unsigned - {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo) {name: "FMUL", argLength: 2, reg: fp21, asm: "FMUL", commutative: true}, // arg0*arg1 {name: "FMULS", argLength: 2, reg: fp21, asm: "FMULS", commutative: true}, // arg0*arg1 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4a3abc2b6a..162955675f 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2087,7 +2087,6 @@ const ( OpPPC64MULHW OpPPC64MULHDU OpPPC64MULHWU - OpPPC64LoweredMuluhilo OpPPC64FMUL OpPPC64FMULS OpPPC64FMADD @@ -27944,21 +27943,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "LoweredMuluhilo", - argLen: 2, - resultNotInArgs: true, - reg: regInfo{ - inputs: []inputInfo{ - {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 - {1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 - }, - outputs: []outputInfo{ - {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 - {1, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 - }, - }, - }, { name: "FMUL", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 0df82d4285..8d0caf833b 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -368,9 +368,6 @@ func rewriteValuePPC64(v *Value) bool { case OpMul64F: v.Op = OpPPC64FMUL return true - case OpMul64uhilo: - v.Op = OpPPC64LoweredMuluhilo - return true case OpMul8: v.Op = OpPPC64MULLW return true @@ -16225,6 +16222,18 @@ func rewriteValuePPC64_OpSelect0(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Select0 (Mul64uhilo x y)) + // result: (MULHDU x y) + for { + if v_0.Op != OpMul64uhilo { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpPPC64MULHDU) + v.AddArg2(x, y) + return true + } // match: (Select0 (Add64carry x y c)) // result: (Select0 (ADDE x y (Select1 (ADDCconst c [-1])))) for { @@ -16573,6 +16582,18 @@ func rewriteValuePPC64_OpSelect1(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types + // match: (Select1 (Mul64uhilo x y)) + // result: (MULLD x y) + for { + if v_0.Op != OpMul64uhilo { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + v.reset(OpPPC64MULLD) + v.AddArg2(x, y) + return true + } // match: (Select1 (Add64carry x y c)) // result: (ADDZEzero (Select1 (ADDE x y (Select1 (ADDCconst c [-1]))))) for { -- 2.48.1