cmd/compile: add floating point min/max intrinsics on s390x

author Michael Munday <mndygolang+git@gmail.com>

Fri, 27 Jun 2025 20:05:38 +0000 (21:05 +0100)

committer Michael Munday <mndygolang+git@gmail.com>

Wed, 30 Jul 2025 19:29:15 +0000 (12:29 -0700)
author Michael Munday <mndygolang+git@gmail.com>
Fri, 27 Jun 2025 20:05:38 +0000 (21:05 +0100)
committer Michael Munday <mndygolang+git@gmail.com>
Wed, 30 Jul 2025 19:29:15 +0000 (12:29 -0700)
diff --git a/src/cmd/asm/internal/asm/testdata/s390x.s b/src/cmd/asm/internal/asm/testdata/s390x.s

index a19292b263640190ccb7972ddd33dcb249194070..93c3ec9ea7f22ed9e97e23310987a872592ad016 100644 (file)
--- a/src/cmd/asm/internal/asm/testdata/s390x.s
+++ b/src/cmd/asm/internal/asm/testdata/s390x.s
@@ -540,6 +540,18 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
         VSTRCZBS V18, V20, V22, V24     // e78240306f8a
         VSTRCZHS V18, V20, V22, V24     // e78241306f8a
         VSTRCZFS V18, V20, V22, V24     // e78242306f8a
+       VFMAXSB $1, V2, V3, V4          // e742301020ef
+       WFMAXSB $2, V5, V6, V7          // e775602820ef
+       WFMAXSB $2, F5, F6, F7          // e775602820ef
+       VFMAXDB $3, V8, V9, V10         // e7a8903030ef
+       WFMAXDB $4, V11, V12, V13       // e7dbc04830ef
+       WFMAXDB $4, F11, F12, F13       // e7dbc04830ef
+       VFMINSB $7, V14, V15, V16       // e70ef07028ee
+       WFMINSB $8, V17, V18, V19       // e73120882eee
+       WFMINSB $8, F1, F2, F3          // e731208820ee
+       VFMINDB $9, V20, V21, V22       // e76450903eee
+       WFMINDB $10, V23, V24, V25      // e79780a83eee
+       WFMINDB $10, F7, F8, F9         // e79780a830ee
  
         RET
         RET     foo(SB)
diff --git a/src/cmd/compile/internal/s390x/ssa.go b/src/cmd/compile/internal/s390x/ssa.go

index 4d24881dbaf5d59911b795211f51b2708c3f39be..ad66bfb5d8533423c6692488bfb6a0c3fd021bcf 100644 (file)
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@@ -281,6 +281,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
         case ssa.OpS390XCPSDR:
                 p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
                 p.Reg = v.Args[0].Reg()
+       case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
+               ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
+               p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
+               p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
         case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
                 ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
                 ssa.OpS390XMODD, ssa.OpS390XMODW,
diff --git a/src/cmd/compile/internal/ssa/_gen/S390X.rules b/src/cmd/compile/internal/ssa/_gen/S390X.rules

index 231ad0615dd96e891b141214b9a6a31d69900cce..80e12f8e29d6d2ee21739a1fb4ff23d882dbefcf 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules
@@ -145,6 +145,9 @@
  
  (Sqrt32    ...) => (FSQRTS ...)
  
+(Max(64|32)F ...) => (WFMAX(D|S)B ...)
+(Min(64|32)F ...) => (WFMIN(D|S)B ...)
+
  // Atomic loads and stores.
  // The SYNC instruction (fast-BCR-serialization) prevents store-load
  // reordering. Other sequences of memory operations (load-load,
diff --git a/src/cmd/compile/internal/ssa/_gen/S390XOps.go b/src/cmd/compile/internal/ssa/_gen/S390XOps.go

index 2f57d12630d5d42154625377a15007f481e492ba..38fb3cb0748932c7c1181e7f90bc5a8ee3ce7adc 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go
@@ -222,6 +222,12 @@ func init() {
                 {name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"},                                                                       // fp64/fp32 clear sign bit
                 {name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"},                                                                       // fp64/fp32 copy arg1 sign bit to arg0
  
+               // Single element vector floating point min / max instructions
+               {name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
+               {name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
+               {name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
+               {name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
+
                 // Round to integer, float64 only.
                 //
                 // aux | rounding mode
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index e88af66f5fdbd97ea3117e6b71e6376fdc9f1074..36c1815ea2ea7dcf100472b320020044eef96873 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2655,6 +2655,10 @@ const (
         OpS390XLPDFR
         OpS390XLNDFR
         OpS390XCPSDR
+       OpS390XWFMAXDB
+       OpS390XWFMAXSB
+       OpS390XWFMINDB
+       OpS390XWFMINSB
         OpS390XFIDBR
         OpS390XFMOVSload
         OpS390XFMOVDload
@@ -35775,6 +35779,62 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:   "WFMAXDB",
+               argLen: 2,
+               asm:    s390x.AWFMAXDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:   "WFMAXSB",
+               argLen: 2,
+               asm:    s390x.AWFMAXSB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:   "WFMINDB",
+               argLen: 2,
+               asm:    s390x.AWFMINDB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
+       {
+               name:   "WFMINSB",
+               argLen: 2,
+               asm:    s390x.AWFMINSB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                               {1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+                       outputs: []outputInfo{
+                               {0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+                       },
+               },
+       },
         {
                 name:    "FIDBR",
                 auxType: auxInt8,
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go

index 2e7492501a803adef4ca0ffba5be535a5869daf1..a7fde81c4789b2f2749adeb6db3a79ea9dcddae8 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -368,6 +368,18 @@ func rewriteValueS390X(v *Value) bool {
                 return rewriteValueS390X_OpLsh8x64(v)
         case OpLsh8x8:
                 return rewriteValueS390X_OpLsh8x8(v)
+       case OpMax32F:
+               v.Op = OpS390XWFMAXSB
+               return true
+       case OpMax64F:
+               v.Op = OpS390XWFMAXDB
+               return true
+       case OpMin32F:
+               v.Op = OpS390XWFMINSB
+               return true
+       case OpMin64F:
+               v.Op = OpS390XWFMINDB
+               return true
         case OpMod16:
                 return rewriteValueS390X_OpMod16(v)
         case OpMod16u:
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go

index 3deb0ecf2336a1ff8444a141c842115c7262e50c..bce94d35f94841f4a09b32c148e3331030651e5b 100644 (file)
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -3986,7 +3986,7 @@ func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
                 if typ.IsFloat() {
                         hasIntrinsic := false
                         switch Arch.LinkArch.Family {
-                       case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64:
+                       case sys.AMD64, sys.ARM64, sys.Loong64, sys.RISCV64, sys.S390X:
                                 hasIntrinsic = true
                         case sys.PPC64:
                                 hasIntrinsic = buildcfg.GOPPC64 >= 9
diff --git a/src/cmd/internal/obj/s390x/a.out.go b/src/cmd/internal/obj/s390x/a.out.go

index 1a64370efa87673d6d508e4d520d00a89e11e78e..dc715182f5cf81b86e164c6172c7db3970c634f0 100644 (file)
--- a/src/cmd/internal/obj/s390x/a.out.go
+++ b/src/cmd/internal/obj/s390x/a.out.go
@@ -715,6 +715,14 @@ const (
         AWFLNDB
         AVFLPDB
         AWFLPDB
+       AVFMAXDB
+       AWFMAXDB
+       AVFMAXSB
+       AWFMAXSB
+       AVFMINDB
+       AWFMINDB
+       AVFMINSB
+       AWFMINSB
         AVFSQ
         AVFSQDB
         AWFSQDB
diff --git a/src/cmd/internal/obj/s390x/anames.go b/src/cmd/internal/obj/s390x/anames.go

index c0a0c401fa09a061008def835f0d848cd14d472d..a6f2820f85d5244b4bbc971e8fa27c7e7953a92f 100644 (file)
--- a/src/cmd/internal/obj/s390x/anames.go
+++ b/src/cmd/internal/obj/s390x/anames.go
@@ -438,6 +438,14 @@ var Anames = []string{
         "WFLNDB",
         "VFLPDB",
         "WFLPDB",
+       "VFMAXDB",
+       "WFMAXDB",
+       "VFMAXSB",
+       "WFMAXSB",
+       "VFMINDB",
+       "WFMINDB",
+       "VFMINSB",
+       "WFMINSB",
         "VFSQ",
         "VFSQDB",
         "WFSQDB",
diff --git a/src/cmd/internal/obj/s390x/asmz.go b/src/cmd/internal/obj/s390x/asmz.go

index 72d92abbaf2a26807bfe2e34ce8327014984139a..957222a1559388540db306c37a4ca04e51bfe90f 100644 (file)
--- a/src/cmd/internal/obj/s390x/asmz.go
+++ b/src/cmd/internal/obj/s390x/asmz.go
@@ -441,6 +441,11 @@ var optab = []Optab{
         {i: 119, as: AVERLLVG, a1: C_VREG, a2: C_VREG, a6: C_VREG},
         {i: 119, as: AVERLLVG, a1: C_VREG, a6: C_VREG},
  
+       // VRR-c floating point min/max
+       {i: 128, as: AVFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
+       {i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_VREG, a3: C_VREG, a6: C_VREG},
+       {i: 128, as: AWFMAXDB, a1: C_SCON, a2: C_FREG, a3: C_FREG, a6: C_FREG},
+
         // VRR-d
         {i: 120, as: AVACQ, a1: C_VREG, a2: C_VREG, a3: C_VREG, a6: C_VREG},
  
@@ -1480,6 +1485,14 @@ func buildop(ctxt *obj.Link) {
                         opset(AVFMSDB, r)
                         opset(AWFMSDB, r)
                         opset(AVPERM, r)
+               case AVFMAXDB:
+                       opset(AVFMAXSB, r)
+                       opset(AVFMINDB, r)
+                       opset(AVFMINSB, r)
+               case AWFMAXDB:
+                       opset(AWFMAXSB, r)
+                       opset(AWFMINDB, r)
+                       opset(AWFMINSB, r)
                 case AKM:
                         opset(AKMC, r)
                         opset(AKLMD, r)
@@ -2636,6 +2649,8 @@ const (
         op_VUPLL  uint32 = 0xE7D4 //    VRR-a   VECTOR UNPACK LOGICAL LOW
         op_VUPL   uint32 = 0xE7D6 //    VRR-a   VECTOR UNPACK LOW
         op_VMSL   uint32 = 0xE7B8 //    VRR-d   VECTOR MULTIPLY SUM LOGICAL
+       op_VFMAX  uint32 = 0xE7EF //    VRR-c   VECTOR FP MAXIMUM
+       op_VFMIN  uint32 = 0xE7EE //    VRR-c   VECTOR FP MINIMUM
  
         // added in z15
         op_KDSA uint32 = 0xB93A // FORMAT_RRE        COMPUTE DIGITAL SIGNATURE AUTHENTICATION (KDSA)
@@ -4475,6 +4490,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
                         c.ctxt.Diag("padding byte register cannot be same as input or output register %v", p)
                 }
                 zRS(op_MVCLE, uint32(p.To.Reg), uint32(p.Reg), uint32(p.From.Reg), uint32(d2), asm)
+
+       case 128: // VRR-c floating point max/min
+               op, m4, _ := vop(p.As)
+               m5 := singleElementMask(p.As)
+               m6 := uint32(c.vregoff(&p.From))
+               zVRRc(op, uint32(p.To.Reg), uint32(p.Reg), uint32(p.GetFrom3().Reg), m6, m5, m4, asm)
         }
  }
  
diff --git a/src/cmd/internal/obj/s390x/vector.go b/src/cmd/internal/obj/s390x/vector.go

index e7e36eaf15abd61ad64161d9fa86f48d58c616d2..966cd04c277c387f99462a08f16f50f4fde75a9c 100644 (file)
--- a/src/cmd/internal/obj/s390x/vector.go
+++ b/src/cmd/internal/obj/s390x/vector.go
@@ -1027,6 +1027,22 @@ func vop(as obj.As) (opcode, es, cs uint32) {
                 return op_VUPL, 1, 0
         case AVUPLF:
                 return op_VUPL, 2, 0
+       case AVFMAXDB:
+               return op_VFMAX, 3, 0
+       case AWFMAXDB:
+               return op_VFMAX, 3, 0
+       case AVFMAXSB:
+               return op_VFMAX, 2, 0
+       case AWFMAXSB:
+               return op_VFMAX, 2, 0
+       case AVFMINDB:
+               return op_VFMIN, 3, 0
+       case AWFMINDB:
+               return op_VFMIN, 3, 0
+       case AVFMINSB:
+               return op_VFMIN, 2, 0
+       case AWFMINSB:
+               return op_VFMIN, 2, 0
         }
  }
  
@@ -1062,7 +1078,11 @@ func singleElementMask(as obj.As) uint32 {
                 AWFSQDB,
                 AWFSDB,
                 AWFTCIDB,
-               AWFIDB:
+               AWFIDB,
+               AWFMAXDB,
+               AWFMAXSB,
+               AWFMINDB,
+               AWFMINSB:
                 return 8
         }
         return 0
diff --git a/test/codegen/floats.go b/test/codegen/floats.go

index d04202d39423d14115d1fb071b1081d6bd36bc2e..0cee49727958e5a21847b7115f44a594c8d79d2c 100644 (file)
--- a/test/codegen/floats.go
+++ b/test/codegen/floats.go
@@ -172,6 +172,7 @@ func Float64Min(a, b float64) float64 {
         // riscv64:"FMIN"
         // ppc64/power9:"XSMINJDP"
         // ppc64/power10:"XSMINJDP"
+       // s390x: "WFMINDB"
         return min(a, b)
  }
  
@@ -182,6 +183,7 @@ func Float64Max(a, b float64) float64 {
         // riscv64:"FMAX"
         // ppc64/power9:"XSMAXJDP"
         // ppc64/power10:"XSMAXJDP"
+       // s390x: "WFMAXDB"
         return max(a, b)
  }
  
@@ -192,6 +194,7 @@ func Float32Min(a, b float32) float32 {
         // riscv64:"FMINS"
         // ppc64/power9:"XSMINJDP"
         // ppc64/power10:"XSMINJDP"
+       // s390x: "WFMINSB"
         return min(a, b)
  }
  
@@ -202,6 +205,7 @@ func Float32Max(a, b float32) float32 {
         // riscv64:"FMAXS"
         // ppc64/power9:"XSMAXJDP"
         // ppc64/power10:"XSMAXJDP"
+       // s390x: "WFMAXSB"
         return max(a, b)
  }
author	Michael Munday <mndygolang+git@gmail.com>
	Fri, 27 Jun 2025 20:05:38 +0000 (21:05 +0100)
committer	Michael Munday <mndygolang+git@gmail.com>
	Wed, 30 Jul 2025 19:29:15 +0000 (12:29 -0700)
src/cmd/asm/internal/asm/testdata/s390x.s		patch \| blob \| history
src/cmd/compile/internal/s390x/ssa.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/S390X.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/S390XOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteS390X.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/ssa.go		patch \| blob \| history
src/cmd/internal/obj/s390x/a.out.go		patch \| blob \| history
src/cmd/internal/obj/s390x/anames.go		patch \| blob \| history
src/cmd/internal/obj/s390x/asmz.go		patch \| blob \| history
src/cmd/internal/obj/s390x/vector.go		patch \| blob \| history
test/codegen/floats.go		patch \| blob \| history