From 7615b20d06500fe5c95c061f6ff32e0c97639a60 Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Thu, 20 Aug 2020 15:06:23 -0500 Subject: [PATCH] cmd/compile: generate subfic on ppc64 This merges an lis + subf into subfic, and for 32b constants lwa + subf into oris + ori + subf. The carry bit is no longer used in code generation, therefore I think we can clobber it as needed. Note, lowered borrow/carry arithmetic is self-contained and thus is not affected. A few extra rules are added to ensure early transformations to SUBFCconst don't trip up earlier rules, fold constant operations, or otherwise simplify lowering. Likewise, tests are added to ensure all rules are hit. Generic constant folding catches trivial cases, however some lowering rules insert arithmetic which can introduce new opportunities (e.g BitLen or Slicemask). I couldn't find a specific benchmark to demonstrate noteworthy improvements, but this is generating subfic in many of the default bent test binaries, so we are at least saving a little code space. Change-Id: Iad7c6e5767eaa9dc24dc1c989bd1c8cfe1982012 Reviewed-on: https://go-review.googlesource.com/c/go/+/249461 Run-TryBot: Lynn Boger TryBot-Result: Gobot Gobot Reviewed-by: Carlos Eduardo Seo --- src/cmd/compile/internal/ppc64/ssa.go | 8 + src/cmd/compile/internal/ssa/gen/PPC64.rules | 31 +- src/cmd/compile/internal/ssa/gen/PPC64Ops.go | 1 + src/cmd/compile/internal/ssa/opGen.go | 15 + src/cmd/compile/internal/ssa/rewritePPC64.go | 571 ++++++++++++++++++- test/codegen/arithmetic.go | 42 ++ test/codegen/mathbits.go | 14 + test/codegen/slices.go | 21 + 8 files changed, 686 insertions(+), 17 deletions(-) diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go index 9c4c01e935..f8d9ac2379 100644 --- a/src/cmd/compile/internal/ppc64/ssa.go +++ b/src/cmd/compile/internal/ppc64/ssa.go @@ -649,6 +649,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() + case ssa.OpPPC64SUBFCconst: + p := s.Prog(v.Op.Asm()) + p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt}) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + case ssa.OpPPC64ANDCCconst: p := s.Prog(v.Op.Asm()) p.Reg = v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index 509cfe1c4f..e5fb1e98c2 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -110,13 +110,21 @@ // Rotate generation with non-const shift // these match patterns from math/bits/RotateLeft[32|64], but there could be others (ADD (SLD x (ANDconst [63] y)) (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y)))) => (ROTL x y) +(ADD (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) => (ROTL x y) ( OR (SLD x (ANDconst [63] y)) (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y)))) => (ROTL x y) +( OR (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) => (ROTL x y) (XOR (SLD x (ANDconst [63] y)) (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y)))) => (ROTL x y) +(XOR (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) => (ROTL x y) + +(ADD (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) => (ROTLW x y) (ADD (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) => (ROTLW x y) +( OR (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) => (ROTLW x y) ( OR (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) => (ROTLW x y) +(XOR (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) => (ROTLW x y) (XOR (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) => (ROTLW x y) + // Lowering rotates (RotateLeft32 x y) => (ROTLW x y) (RotateLeft64 x y) => (ROTL x y) @@ -192,11 +200,15 @@ (Rsh64Ux64 x (AND y (MOVDconst [63]))) => (SRD x (ANDconst [63] y)) (Rsh64Ux64 x (ANDconst [63] y)) => (SRD x (ANDconst [63] y)) (Rsh64Ux64 x (SUB (MOVDconst [64]) (ANDconst [63] y))) => (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y))) +(Rsh64Ux64 x (SUBFCconst [64] (ANDconst [63] y))) => (SRD x (SUBFCconst [64] (ANDconst [63] y))) (Rsh64Ux64 x (SUB (MOVDconst [64]) (AND y (MOVDconst [63])))) => (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y))) +(Rsh64Ux64 x (SUBFCconst [64] (AND y (MOVDconst [63])))) => (SRD x (SUBFCconst [64] (ANDconst [63] y))) (Rsh64x64 x (AND y (MOVDconst [63]))) => (SRAD x (ANDconst [63] y)) (Rsh64x64 x (ANDconst [63] y)) => (SRAD x (ANDconst [63] y)) (Rsh64x64 x (SUB (MOVDconst [64]) (ANDconst [63] y))) => (SRAD x (SUB (MOVDconst [64]) (ANDconst [63] y))) +(Rsh64x64 x (SUBFCconst [64] (ANDconst [63] y))) => (SRAD x (SUBFCconst [64] (ANDconst [63] y))) (Rsh64x64 x (SUB (MOVDconst [64]) (AND y (MOVDconst [63])))) => (SRAD x (SUB (MOVDconst [64]) (ANDconst [63] y))) +(Rsh64x64 x (SUBFCconst [64] (AND y (MOVDconst [63])))) => (SRAD x (SUBFCconst [64] (ANDconst [63] y))) (Lsh64x64 x y) => (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) (Rsh64x64 x y) => (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) @@ -208,12 +220,16 @@ (Rsh32Ux64 x (AND y (MOVDconst [31]))) => (SRW x (ANDconst [31] y)) (Rsh32Ux64 x (ANDconst [31] y)) => (SRW x (ANDconst [31] y)) (Rsh32Ux64 x (SUB (MOVDconst [32]) (ANDconst [31] y))) => (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y))) +(Rsh32Ux64 x (SUBFCconst [32] (ANDconst [31] y))) => (SRW x (SUBFCconst [32] (ANDconst [31] y))) (Rsh32Ux64 x (SUB (MOVDconst [32]) (AND y (MOVDconst [31])))) => (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y))) +(Rsh32Ux64 x (SUBFCconst [32] (AND y (MOVDconst [31])))) => (SRW x (SUBFCconst [32] (ANDconst [31] y))) (Rsh32x64 x (AND y (MOVDconst [31]))) => (SRAW x (ANDconst [31] y)) (Rsh32x64 x (ANDconst [31] y)) => (SRAW x (ANDconst [31] y)) (Rsh32x64 x (SUB (MOVDconst [32]) (ANDconst [31] y))) => (SRAW x (SUB (MOVDconst [32]) (ANDconst [31] y))) +(Rsh32x64 x (SUBFCconst [32] (ANDconst [31] y))) => (SRAW x (SUBFCconst [32] (ANDconst [31] y))) (Rsh32x64 x (SUB (MOVDconst [32]) (AND y (MOVDconst [31])))) => (SRAW x (SUB (MOVDconst [32]) (ANDconst [31] y))) +(Rsh32x64 x (SUBFCconst [32] (AND y (MOVDconst [31])))) => (SRAW x (SUBFCconst [32] (ANDconst [31] y))) (Rsh32x64 x y) => (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) (Rsh32Ux64 x y) => (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) @@ -299,8 +315,8 @@ (Ctz16 x) => (POPCNTW (MOVHZreg (ANDN (ADDconst [-1] x) x))) (Ctz8 x) => (POPCNTB (MOVBZreg (ANDN (ADDconst [-1] x) x))) -(BitLen64 x) => (SUB (MOVDconst [64]) (CNTLZD x)) -(BitLen32 x) => (SUB (MOVDconst [32]) (CNTLZW x)) +(BitLen64 x) => (SUBFCconst [64] (CNTLZD x)) +(BitLen32 x) => (SUBFCconst [32] (CNTLZW x)) (PopCount64 ...) => (POPCNTD ...) (PopCount32 x) => (POPCNTW (MOVWZreg x)) @@ -770,10 +786,19 @@ (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x) (ADDconst [0] x) => x (SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x) -// TODO deal with subtract-from-const (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) +// Subtract from (with carry, but ignored) constant. +// Note, these clobber the carry bit. +(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) +(SUBFCconst [c] (NEG x)) => (ADDconst [c] x) +(SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x) +(SUBFCconst [0] x) => (NEG x) +(ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x) +(NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x) +(NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x) + // Use register moves instead of stores and loads to move int<=>float values // Common with math Float64bits, Float64frombits (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index f91222446c..44f6a74c63 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -175,6 +175,7 @@ func init() { {name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true}, // arg0+arg1 {name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1 {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0-arg1 + {name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"}, // auxInt - arg0 (with carry) {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"}, // arg0-arg1 {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"}, // arg0-arg1 diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index e181174d11..45401898c8 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1828,6 +1828,7 @@ const ( OpPPC64FADD OpPPC64FADDS OpPPC64SUB + OpPPC64SUBFCconst OpPPC64FSUB OpPPC64FSUBS OpPPC64MULLD @@ -24313,6 +24314,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "SUBFCconst", + auxType: auxInt64, + argLen: 1, + asm: ppc64.ASUBC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + outputs: []outputInfo{ + {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 + }, + }, + }, { name: "FSUB", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go index 1a0b03e81c..152cdfdf4d 100644 --- a/src/cmd/compile/internal/ssa/rewritePPC64.go +++ b/src/cmd/compile/internal/ssa/rewritePPC64.go @@ -568,6 +568,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64MOVWstorezero(v) case OpPPC64MTVSRD: return rewriteValuePPC64_OpPPC64MTVSRD(v) + case OpPPC64NEG: + return rewriteValuePPC64_OpPPC64NEG(v) case OpPPC64NOR: return rewriteValuePPC64_OpPPC64NOR(v) case OpPPC64NotEqual: @@ -596,6 +598,8 @@ func rewriteValuePPC64(v *Value) bool { return rewriteValuePPC64_OpPPC64SRW(v) case OpPPC64SUB: return rewriteValuePPC64_OpPPC64SUB(v) + case OpPPC64SUBFCconst: + return rewriteValuePPC64_OpPPC64SUBFCconst(v) case OpPPC64XOR: return rewriteValuePPC64_OpPPC64XOR(v) case OpPPC64XORconst: @@ -1021,15 +1025,14 @@ func rewriteValuePPC64_OpBitLen32(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (BitLen32 x) - // result: (SUB (MOVDconst [32]) (CNTLZW x)) + // result: (SUBFCconst [32] (CNTLZW x)) for { x := v_0 - v.reset(OpPPC64SUB) - v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64) - v0.AuxInt = int64ToAuxInt(32) - v1 := b.NewValue0(v.Pos, OpPPC64CNTLZW, typ.Int) - v1.AddArg(x) - v.AddArg2(v0, v1) + v.reset(OpPPC64SUBFCconst) + v.AuxInt = int64ToAuxInt(32) + v0 := b.NewValue0(v.Pos, OpPPC64CNTLZW, typ.Int) + v0.AddArg(x) + v.AddArg(v0) return true } } @@ -1038,15 +1041,14 @@ func rewriteValuePPC64_OpBitLen64(v *Value) bool { b := v.Block typ := &b.Func.Config.Types // match: (BitLen64 x) - // result: (SUB (MOVDconst [64]) (CNTLZD x)) + // result: (SUBFCconst [64] (CNTLZD x)) for { x := v_0 - v.reset(OpPPC64SUB) - v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64) - v0.AuxInt = int64ToAuxInt(64) - v1 := b.NewValue0(v.Pos, OpPPC64CNTLZD, typ.Int) - v1.AddArg(x) - v.AddArg2(v0, v1) + v.reset(OpPPC64SUBFCconst) + v.AuxInt = int64ToAuxInt(64) + v0 := b.NewValue0(v.Pos, OpPPC64CNTLZD, typ.Int) + v0.AddArg(x) + v.AddArg(v0) return true } } @@ -3957,6 +3959,76 @@ func rewriteValuePPC64_OpPPC64ADD(v *Value) bool { } break } + // match: (ADD (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) + // result: (ROTL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLD { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRD { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTL) + v.AddArg2(x, y) + return true + } + break + } + // match: (ADD (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) + // result: (ROTLW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLW { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRW { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTLW) + v.AddArg2(x, y) + return true + } + break + } // match: (ADD (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) // result: (ROTLW x y) for { @@ -4069,6 +4141,24 @@ func rewriteValuePPC64_OpPPC64ADDconst(v *Value) bool { v.AddArg(x) return true } + // match: (ADDconst [c] (SUBFCconst [d] x)) + // cond: is32Bit(c+d) + // result: (SUBFCconst [c+d] x) + for { + c := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SUBFCconst { + break + } + d := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(c + d)) { + break + } + v.reset(OpPPC64SUBFCconst) + v.AuxInt = int64ToAuxInt(c + d) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64AND(v *Value) bool { @@ -10336,6 +10426,44 @@ func rewriteValuePPC64_OpPPC64MTVSRD(v *Value) bool { } return false } +func rewriteValuePPC64_OpPPC64NEG(v *Value) bool { + v_0 := v.Args[0] + // match: (NEG (ADDconst [c] x)) + // cond: is32Bit(-c) + // result: (SUBFCconst [-c] x) + for { + if v_0.Op != OpPPC64ADDconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(-c)) { + break + } + v.reset(OpPPC64SUBFCconst) + v.AuxInt = int64ToAuxInt(-c) + v.AddArg(x) + return true + } + // match: (NEG (SUBFCconst [c] x)) + // cond: is32Bit(-c) + // result: (ADDconst [-c] x) + for { + if v_0.Op != OpPPC64SUBFCconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(-c)) { + break + } + v.reset(OpPPC64ADDconst) + v.AuxInt = int64ToAuxInt(-c) + v.AddArg(x) + return true + } + return false +} func rewriteValuePPC64_OpPPC64NOR(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -10510,6 +10638,76 @@ func rewriteValuePPC64_OpPPC64OR(v *Value) bool { } break } + // match: ( OR (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) + // result: (ROTL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLD { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRD { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTL) + v.AddArg2(x, y) + return true + } + break + } + // match: ( OR (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) + // result: (ROTLW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLW { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRW { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTLW) + v.AddArg2(x, y) + return true + } + break + } // match: ( OR (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) // result: (ROTLW x y) for { @@ -12109,6 +12307,69 @@ func rewriteValuePPC64_OpPPC64SUB(v *Value) bool { v.AddArg(x) return true } + // match: (SUB (MOVDconst [c]) x) + // cond: is32Bit(c) + // result: (SUBFCconst [c] x) + for { + if v_0.Op != OpPPC64MOVDconst { + break + } + c := auxIntToInt64(v_0.AuxInt) + x := v_1 + if !(is32Bit(c)) { + break + } + v.reset(OpPPC64SUBFCconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } + return false +} +func rewriteValuePPC64_OpPPC64SUBFCconst(v *Value) bool { + v_0 := v.Args[0] + // match: (SUBFCconst [c] (NEG x)) + // result: (ADDconst [c] x) + for { + c := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64NEG { + break + } + x := v_0.Args[0] + v.reset(OpPPC64ADDconst) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } + // match: (SUBFCconst [c] (SUBFCconst [d] x)) + // cond: is32Bit(c-d) + // result: (ADDconst [c-d] x) + for { + c := auxIntToInt64(v.AuxInt) + if v_0.Op != OpPPC64SUBFCconst { + break + } + d := auxIntToInt64(v_0.AuxInt) + x := v_0.Args[0] + if !(is32Bit(c - d)) { + break + } + v.reset(OpPPC64ADDconst) + v.AuxInt = int64ToAuxInt(c - d) + v.AddArg(x) + return true + } + // match: (SUBFCconst [0] x) + // result: (NEG x) + for { + if auxIntToInt64(v.AuxInt) != 0 { + break + } + x := v_0 + v.reset(OpPPC64NEG) + v.AddArg(x) + return true + } return false } func rewriteValuePPC64_OpPPC64XOR(v *Value) bool { @@ -12204,6 +12465,76 @@ func rewriteValuePPC64_OpPPC64XOR(v *Value) bool { } break } + // match: (XOR (SLD x (ANDconst [63] y)) (SRD x (SUBFCconst [64] (ANDconst [63] y)))) + // result: (ROTL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLD { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRD { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTL) + v.AddArg2(x, y) + return true + } + break + } + // match: (XOR (SLW x (ANDconst [31] y)) (SRW x (SUBFCconst [32] (ANDconst [31] y)))) + // result: (ROTLW x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpPPC64SLW { + continue + } + _ = v_0.Args[1] + x := v_0.Args[0] + v_0_1 := v_0.Args[1] + if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 { + continue + } + y := v_0_1.Args[0] + if v_1.Op != OpPPC64SRW { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] { + continue + } + v_1_1 := v_1.Args[1] + if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 { + continue + } + v_1_1_0 := v_1_1.Args[0] + if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] { + continue + } + v.reset(OpPPC64ROTLW) + v.AddArg2(x, y) + return true + } + break + } // match: (XOR (SLW x (ANDconst [31] y)) (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y)))) // result: (ROTLW x y) for { @@ -13175,6 +13506,28 @@ func rewriteValuePPC64_OpRsh32Ux64(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Rsh32Ux64 x (SUBFCconst [32] (ANDconst [31] y))) + // result: (SRW x (SUBFCconst [32] (ANDconst [31] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 31 { + break + } + y := v_1_0.Args[0] + v.reset(OpPPC64SRW) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(31) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } // match: (Rsh32Ux64 x (SUB (MOVDconst [32]) (AND y (MOVDconst [31])))) // result: (SRW x (SUB (MOVDconst [32]) (ANDconst [31] y))) for { @@ -13212,6 +13565,37 @@ func rewriteValuePPC64_OpRsh32Ux64(v *Value) bool { } break } + // match: (Rsh32Ux64 x (SUBFCconst [32] (AND y (MOVDconst [31])))) + // result: (SRW x (SUBFCconst [32] (ANDconst [31] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt { + break + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 { + y := v_1_0_0 + if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 31 { + continue + } + v.reset(OpPPC64SRW) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(31) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } + break + } // match: (Rsh32Ux64 x y) // result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) for { @@ -13482,6 +13866,28 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Rsh32x64 x (SUBFCconst [32] (ANDconst [31] y))) + // result: (SRAW x (SUBFCconst [32] (ANDconst [31] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 31 { + break + } + y := v_1_0.Args[0] + v.reset(OpPPC64SRAW) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(31) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } // match: (Rsh32x64 x (SUB (MOVDconst [32]) (AND y (MOVDconst [31])))) // result: (SRAW x (SUB (MOVDconst [32]) (ANDconst [31] y))) for { @@ -13519,6 +13925,37 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool { } break } + // match: (Rsh32x64 x (SUBFCconst [32] (AND y (MOVDconst [31])))) + // result: (SRAW x (SUBFCconst [32] (ANDconst [31] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt { + break + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 { + y := v_1_0_0 + if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 31 { + continue + } + v.reset(OpPPC64SRAW) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(32) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(31) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } + break + } // match: (Rsh32x64 x y) // result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32])))) for { @@ -13787,6 +14224,28 @@ func rewriteValuePPC64_OpRsh64Ux64(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Rsh64Ux64 x (SUBFCconst [64] (ANDconst [63] y))) + // result: (SRD x (SUBFCconst [64] (ANDconst [63] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 63 { + break + } + y := v_1_0.Args[0] + v.reset(OpPPC64SRD) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(63) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } // match: (Rsh64Ux64 x (SUB (MOVDconst [64]) (AND y (MOVDconst [63])))) // result: (SRD x (SUB (MOVDconst [64]) (ANDconst [63] y))) for { @@ -13824,6 +14283,37 @@ func rewriteValuePPC64_OpRsh64Ux64(v *Value) bool { } break } + // match: (Rsh64Ux64 x (SUBFCconst [64] (AND y (MOVDconst [63])))) + // result: (SRD x (SUBFCconst [64] (ANDconst [63] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt { + break + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 { + y := v_1_0_0 + if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 63 { + continue + } + v.reset(OpPPC64SRD) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(63) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } + break + } // match: (Rsh64Ux64 x y) // result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) for { @@ -14094,6 +14584,28 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Rsh64x64 x (SUBFCconst [64] (ANDconst [63] y))) + // result: (SRAD x (SUBFCconst [64] (ANDconst [63] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 63 { + break + } + y := v_1_0.Args[0] + v.reset(OpPPC64SRAD) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(63) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } // match: (Rsh64x64 x (SUB (MOVDconst [64]) (AND y (MOVDconst [63])))) // result: (SRAD x (SUB (MOVDconst [64]) (ANDconst [63] y))) for { @@ -14131,6 +14643,37 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool { } break } + // match: (Rsh64x64 x (SUBFCconst [64] (AND y (MOVDconst [63])))) + // result: (SRAD x (SUBFCconst [64] (ANDconst [63] y))) + for { + x := v_0 + if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 { + break + } + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt { + break + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 { + y := v_1_0_0 + if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 63 { + continue + } + v.reset(OpPPC64SRAD) + v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt) + v0.AuxInt = int64ToAuxInt(64) + v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt) + v1.AuxInt = int64ToAuxInt(63) + v1.AddArg(y) + v0.AddArg(v1) + v.AddArg2(x, v0) + return true + } + break + } // match: (Rsh64x64 x y) // result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64])))) for { diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index afd4d66bd9..0bdb66a376 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -42,6 +42,48 @@ func SubMem(arr []int, b, c, d int) int { return arr[0] - arr[1] } +func SubFromConst(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR` + b := 40 - a + return b +} + +func SubFromConstNeg(a int) int { + // ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]40,\sR[0-9]+,\sR` + c := 40 - (-a) + return c +} + +func SubSubFromConst(a int) int { + // ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]20,\sR[0-9]+,\sR` + c := 40 - (20 - a) + return c +} + +func AddSubFromConst(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR` + c := 40 + (20 - a) + return c +} + +func NegSubFromConst(a int) int { + // ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR` + // ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR` + c := -(20 - a) + return c +} + +func NegAddFromConstNeg(a int) int { + // ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR` + // ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR` + c := -(-40 + a) + return c +} + // -------------------- // // Multiplication // // -------------------- // diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 942605de55..4c35f26997 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -76,9 +76,17 @@ func Len64(n uint64) int { // arm:"CLZ" arm64:"CLZ" // mips:"CLZ" // wasm:"I64Clz" + // ppc64le:"SUBC","CNTLZD" + // ppc64:"SUBC","CNTLZD" return bits.Len64(n) } +func SubFromLen64(n uint64) int { + // ppc64le:"CNTLZD",-"SUBC" + // ppc64:"CNTLZD",-"SUBC" + return 64 - bits.Len64(n) +} + func Len32(n uint32) int { // amd64:"BSRQ","LEAQ",-"CMOVQEQ" // s390x:"FLOGR" @@ -291,6 +299,12 @@ func TrailingZeros64(n uint64) int { return bits.TrailingZeros64(n) } +func TrailingZeros64Subtract(n uint64) int { + // ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD" + // ppc64le/power9:"SUBC","CNTTZD" + return bits.TrailingZeros64(1 - n) +} + func TrailingZeros32(n uint32) int { // amd64:"BTSQ\\t\\$32","BSFQ" // arm:"CLZ" diff --git a/test/codegen/slices.go b/test/codegen/slices.go index 40e857f9f6..38e8a62f4b 100644 --- a/test/codegen/slices.go +++ b/test/codegen/slices.go @@ -347,3 +347,24 @@ func InitNotSmallSliceLiteral() []int { 42, } } + +// --------------------------------------- // +// Test PPC64 SUBFCconst folding rules // +// triggered by slice operations. // +// --------------------------------------- // + +func SliceWithConstCompare(a []int, b int) []int { + var c []int = []int{1, 2, 3, 4, 5} + if b+len(a) < len(c) { + // ppc64le:-"NEG" + // ppc64:-"NEG" + return c[b:] + } + return a +} + +func SliceWithSubtractBound(a []int, b int) []int { + // ppc64le:"SUBC",-"NEG" + // ppc64:"SUBC",-"NEG" + return a[(3 - b):] +} -- 2.48.1