]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: support float min/max instructions on PPC64
authorPaul E. Murphy <murp@ibm.com>
Fri, 22 Mar 2024 16:41:58 +0000 (11:41 -0500)
committerPaul Murphy <murp@ibm.com>
Mon, 1 Apr 2024 18:50:29 +0000 (18:50 +0000)
This enables efficient use of the builtin min/max function
for float64 and float32 types on GOPPC64 >= power9.

Extend the assembler to support xsminjdp/xsmaxjdp and use
them to implement float min/max.

Simplify the VSX xx3 opcode rules to allow FPR arguments,
if all arguments are an FPR.

Change-Id: I15882a4ce5dc46eba71d683cf1d184dc4236a328
Reviewed-on: https://go-review.googlesource.com/c/go/+/574535
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Than McIntosh <thanm@google.com>
src/cmd/asm/internal/asm/testdata/ppc64.s
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/_gen/PPC64.rules
src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/compile/internal/ssagen/ssa.go
src/cmd/internal/obj/ppc64/a.out.go
src/cmd/internal/obj/ppc64/anames.go
src/cmd/internal/obj/ppc64/asm9.go
test/codegen/floats.go

index 57060a3c107a6367dcd4f2ba756035dcd1b4ff95..fc56a9530a03f072dd84c29d1d2e70910478fcfa 100644 (file)
@@ -1133,7 +1133,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
        PNOP                            // 0700000000000000
 
        SETB CR1,R3                     // 7c640100
-       VCLZLSBB V1, R2                 // 10400e02
-       VCTZLSBB V1, R2                 // 10410e02
+       VCLZLSBB V1,R2                  // 10400e02
+       VCTZLSBB V1,R2                  // 10410e02
+
+       XSMAXJDP VS1,VS2,VS3            // f0611480
+       XSMINJDP VS1,VS2,VS3            // f06114c0
 
        RET
index cb030ed2b0d82a6649f4e20361858319b8de93d1..db420b7cb4ec2cde5aaa12b43617c66505c45c74 100644 (file)
@@ -582,7 +582,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
                ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
                ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
-               ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
+               ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW, ssa.OpPPC64XSMINJDP, ssa.OpPPC64XSMAXJDP:
                r := v.Reg()
                r1 := v.Args[0].Reg()
                r2 := v.Args[1].Reg()
index 0b69f5cda9fc23f525fc6ed3fa19830fe90a29bc..75181191476212f235088cbc7a7a871bd9881571 100644 (file)
@@ -14,6 +14,9 @@
 (Sub32F ...) => (FSUBS ...)
 (Sub64F ...) => (FSUB ...)
 
+(Min(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMINJDP x y)
+(Max(32|64)F x y) && buildcfg.GOPPC64 >= 9 => (XSMAXJDP x y)
+
 // Combine 64 bit integer multiply and adds
 (ADD l:(MULLD x y) z) && buildcfg.GOPPC64 >= 9 && l.Uses == 1 && clobber(l) => (MADDLD x y z)
 
index c66413bb0ac951f20f1c36fe2973b5cffa544f18..7f0ee9ab91da4da70d54d1bced0340bef1a89dd8 100644 (file)
@@ -189,6 +189,10 @@ func init() {
                {name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                                               // arg0-arg1
                {name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                                             // arg0-arg1
 
+               // Note, the FPU works with float64 in register.
+               {name: "XSMINJDP", argLength: 2, reg: fp21, asm: "XSMINJDP"}, // fmin(arg0,arg1)
+               {name: "XSMAXJDP", argLength: 2, reg: fp21, asm: "XSMAXJDP"}, // fmax(arg0,arg1)
+
                {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
                {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
                {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
index ab106f2c6d6822f7b72f631aff0e4bedaa87e14a..429c2143950cc028892ba12283d9e24772e3a88c 100644 (file)
@@ -2116,6 +2116,8 @@ const (
        OpPPC64SUBFCconst
        OpPPC64FSUB
        OpPPC64FSUBS
+       OpPPC64XSMINJDP
+       OpPPC64XSMAXJDP
        OpPPC64MULLD
        OpPPC64MULLW
        OpPPC64MULLDconst
@@ -28397,6 +28399,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "XSMINJDP",
+               argLen: 2,
+               asm:    ppc64.AXSMINJDP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                               {1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                       },
+               },
+       },
+       {
+               name:   "XSMAXJDP",
+               argLen: 2,
+               asm:    ppc64.AXSMAXJDP,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                               {1, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372032559808512}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30
+                       },
+               },
+       },
        {
                name:        "MULLD",
                argLen:      2,
index d530837ab7e4b695de135927f38bd7f3036c93d2..4ac5eec073520d7fb9d663e8096fa675569c9643 100644 (file)
@@ -340,6 +340,14 @@ func rewriteValuePPC64(v *Value) bool {
                return rewriteValuePPC64_OpLsh8x64(v)
        case OpLsh8x8:
                return rewriteValuePPC64_OpLsh8x8(v)
+       case OpMax32F:
+               return rewriteValuePPC64_OpMax32F(v)
+       case OpMax64F:
+               return rewriteValuePPC64_OpMax64F(v)
+       case OpMin32F:
+               return rewriteValuePPC64_OpMin32F(v)
+       case OpMin64F:
+               return rewriteValuePPC64_OpMin64F(v)
        case OpMod16:
                return rewriteValuePPC64_OpMod16(v)
        case OpMod16u:
@@ -3296,6 +3304,78 @@ func rewriteValuePPC64_OpLsh8x8(v *Value) bool {
                return true
        }
 }
+func rewriteValuePPC64_OpMax32F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Max32F x y)
+       // cond: buildcfg.GOPPC64 >= 9
+       // result: (XSMAXJDP x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64XSMAXJDP)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpMax64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Max64F x y)
+       // cond: buildcfg.GOPPC64 >= 9
+       // result: (XSMAXJDP x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64XSMAXJDP)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpMin32F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Min32F x y)
+       // cond: buildcfg.GOPPC64 >= 9
+       // result: (XSMINJDP x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64XSMINJDP)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
+func rewriteValuePPC64_OpMin64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Min64F x y)
+       // cond: buildcfg.GOPPC64 >= 9
+       // result: (XSMINJDP x y)
+       for {
+               x := v_0
+               y := v_1
+               if !(buildcfg.GOPPC64 >= 9) {
+                       break
+               }
+               v.reset(OpPPC64XSMINJDP)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
 func rewriteValuePPC64_OpMod16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 37d6165e42bcc4b5178df1e6ccea192b816ed680..59b4c8808921f9fb06c50b216aa2fe25f1846e11 100644 (file)
@@ -3698,8 +3698,15 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
                // string comparisons during walk, not ssagen.
 
                if typ.IsFloat() {
+                       hasIntrinsic := false
                        switch Arch.LinkArch.Family {
                        case sys.AMD64, sys.ARM64, sys.RISCV64:
+                               hasIntrinsic = true
+                       case sys.PPC64:
+                               hasIntrinsic = buildcfg.GOPPC64 >= 9
+                       }
+
+                       if hasIntrinsic {
                                var op ssa.Op
                                switch {
                                case typ.Kind() == types.TFLOAT64 && n.Op() == ir.OMIN:
index ab1b4eb19f0b6e535bd8e20d752eb44ab21a2270..3782af29182618aefd2a18a12a893b318d85927c 100644 (file)
@@ -1074,6 +1074,8 @@ const (
        AXVCVSXWSP
        AXVCVUXDSP
        AXVCVUXWSP
+       AXSMAXJDP
+       AXSMINJDP
        ALASTAOUT // The last instruction in this list. Also the first opcode generated by ppc64map.
 
        // aliases
index f4680cc368a37f3dd61687e6af93fd6fde14417a..1cf41b83070ec4cbe80248f02347e01b20fa5bf6 100644 (file)
@@ -610,5 +610,7 @@ var Anames = []string{
        "XVCVSXWSP",
        "XVCVUXDSP",
        "XVCVUXWSP",
+       "XSMAXJDP",
+       "XSMINJDP",
        "LASTAOUT",
 }
index 2793600cd0c582fdff01c095007a44590d2f30fe..d9b7c2eed3f5c0a4fb4c87378d1c54499f657385 100644 (file)
@@ -429,9 +429,9 @@ var optabBase = []Optab{
        {as: AMTVSRD, a1: C_REG, a6: C_FREG, type_: 104, size: 4},
        {as: AMTVSRDD, a1: C_REG, a2: C_REG, a6: C_VSREG, type_: 104, size: 4},
 
-       /* VSX logical */
-       {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx and, xx3-form */
-       {as: AXXLOR, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4},  /* vsx or, xx3-form */
+       /* VSX xx3-form */
+       {as: AXXLAND, a1: C_FREG, a2: C_FREG, a6: C_FREG, type_: 90, size: 4},    /* vsx xx3-form (FPR usage) */
+       {as: AXXLAND, a1: C_VSREG, a2: C_VSREG, a6: C_VSREG, type_: 90, size: 4}, /* vsx xx3-form */
 
        /* VSX select */
        {as: AXXSEL, a1: C_VSREG, a2: C_VSREG, a3: C_VSREG, a6: C_VSREG, type_: 91, size: 4}, /* vsx select, xx4-form */
@@ -1679,16 +1679,17 @@ func buildop(ctxt *obj.Link) {
                        opset(AMTVSRWZ, r0)
                        opset(AMTVSRWS, r0)
 
-               case AXXLAND: /* xxland, xxlandc, xxleqv, xxlnand */
+               case AXXLAND:
                        opset(AXXLANDC, r0)
                        opset(AXXLEQV, r0)
                        opset(AXXLNAND, r0)
-
-               case AXXLOR: /* xxlorc, xxlnor, xxlor, xxlxor */
                        opset(AXXLORC, r0)
                        opset(AXXLNOR, r0)
                        opset(AXXLORQ, r0)
                        opset(AXXLXOR, r0)
+                       opset(AXXLOR, r0)
+                       opset(AXSMAXJDP, r0)
+                       opset(AXSMINJDP, r0)
 
                case AXXSEL: /* xxsel */
                        opset(AXXSEL, r0)
@@ -4769,6 +4770,10 @@ func (c *ctxt9) oprrr(a obj.As) uint32 {
                return OPVXX3(60, 146, 0) /* xxlor - v2.06 */
        case AXXLXOR:
                return OPVXX3(60, 154, 0) /* xxlxor - v2.06 */
+       case AXSMINJDP:
+               return OPVXX3(60, 152, 0) /* xsminjdp - v3.0 */
+       case AXSMAXJDP:
+               return OPVXX3(60, 144, 0) /* xsmaxjdp - v3.0 */
 
        case AXXSEL:
                return OPVXX4(60, 3, 0) /* xxsel - v2.06 */
index 54dc87ecfdbfad99ae1717d10d264a6dc71340aa..d5c5475567850dd2c00faf66b88015b6be6b06fa 100644 (file)
@@ -165,6 +165,8 @@ func Float64Min(a, b float64) float64 {
        // amd64:"MINSD"
        // arm64:"FMIND"
        // riscv64:"FMIN"
+       // ppc64/power9:"XSMINJDP"
+       // ppc64/power10:"XSMINJDP"
        return min(a, b)
 }
 
@@ -172,6 +174,8 @@ func Float64Max(a, b float64) float64 {
        // amd64:"MINSD"
        // arm64:"FMAXD"
        // riscv64:"FMAX"
+       // ppc64/power9:"XSMAXJDP"
+       // ppc64/power10:"XSMAXJDP"
        return max(a, b)
 }
 
@@ -179,6 +183,8 @@ func Float32Min(a, b float32) float32 {
        // amd64:"MINSS"
        // arm64:"FMINS"
        // riscv64:"FMINS"
+       // ppc64/power9:"XSMINJDP"
+       // ppc64/power10:"XSMINJDP"
        return min(a, b)
 }
 
@@ -186,5 +192,7 @@ func Float32Max(a, b float32) float32 {
        // amd64:"MINSS"
        // arm64:"FMAXS"
        // riscv64:"FMAXS"
+       // ppc64/power9:"XSMAXJDP"
+       // ppc64/power10:"XSMAXJDP"
        return max(a, b)
 }