[dev.simd] cmd/compile: add round simd ops

author Junyang Shao <shaojunyang@google.com>

Thu, 12 Jun 2025 16:21:35 +0000 (16:21 +0000)

committer Junyang Shao <shaojunyang@google.com>

Fri, 13 Jun 2025 18:43:48 +0000 (11:43 -0700)
author Junyang Shao <shaojunyang@google.com>
Thu, 12 Jun 2025 16:21:35 +0000 (16:21 +0000)
committer Junyang Shao <shaojunyang@google.com>
Fri, 13 Jun 2025 18:43:48 +0000 (11:43 -0700)
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go

index 253bec09ca502422037560c1c2dd5b16b57829e0..f5bc26fe742e8349f31e7bdb2ddba7186b973bf3 100644 (file)
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -74,6 +74,10 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                 ssa.OpAMD64VPADDD512,
                 ssa.OpAMD64VPADDQ512,
                 ssa.OpAMD64VPADDB512,
+               ssa.OpAMD64VADDSUBPS128,
+               ssa.OpAMD64VADDSUBPS256,
+               ssa.OpAMD64VADDSUBPD128,
+               ssa.OpAMD64VADDSUBPD256,
                 ssa.OpAMD64VANDPS128,
                 ssa.OpAMD64VANDPS256,
                 ssa.OpAMD64VANDPD128,
@@ -564,6 +568,38 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                 ssa.OpAMD64VSQRTPDMasked512:
                 p = simdFp1k1fp1(s, v)
  
+       case ssa.OpAMD64VROUNDPS128,
+               ssa.OpAMD64VROUNDPS256,
+               ssa.OpAMD64VROUNDPD128,
+               ssa.OpAMD64VROUNDPD256,
+               ssa.OpAMD64VRNDSCALEPS512,
+               ssa.OpAMD64VRNDSCALEPS128,
+               ssa.OpAMD64VRNDSCALEPS256,
+               ssa.OpAMD64VRNDSCALEPD128,
+               ssa.OpAMD64VRNDSCALEPD256,
+               ssa.OpAMD64VRNDSCALEPD512,
+               ssa.OpAMD64VREDUCEPS512,
+               ssa.OpAMD64VREDUCEPS128,
+               ssa.OpAMD64VREDUCEPS256,
+               ssa.OpAMD64VREDUCEPD128,
+               ssa.OpAMD64VREDUCEPD256,
+               ssa.OpAMD64VREDUCEPD512:
+               p = simdFp11Imm8(s, v)
+
+       case ssa.OpAMD64VRNDSCALEPSMasked512,
+               ssa.OpAMD64VRNDSCALEPSMasked128,
+               ssa.OpAMD64VRNDSCALEPSMasked256,
+               ssa.OpAMD64VRNDSCALEPDMasked128,
+               ssa.OpAMD64VRNDSCALEPDMasked256,
+               ssa.OpAMD64VRNDSCALEPDMasked512,
+               ssa.OpAMD64VREDUCEPSMasked512,
+               ssa.OpAMD64VREDUCEPSMasked128,
+               ssa.OpAMD64VREDUCEPSMasked256,
+               ssa.OpAMD64VREDUCEPDMasked128,
+               ssa.OpAMD64VREDUCEPDMasked256,
+               ssa.OpAMD64VREDUCEPDMasked512:
+               p = simdFp1k1fp1Imm8(s, v)
+
         case ssa.OpAMD64VCMPPS128,
                 ssa.OpAMD64VCMPPS256,
                 ssa.OpAMD64VCMPPD128,
@@ -709,6 +745,18 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                 ssa.OpAMD64VPAVGBMasked128,
                 ssa.OpAMD64VPAVGBMasked256,
                 ssa.OpAMD64VPAVGBMasked512,
+               ssa.OpAMD64VRNDSCALEPSMasked512,
+               ssa.OpAMD64VRNDSCALEPSMasked128,
+               ssa.OpAMD64VRNDSCALEPSMasked256,
+               ssa.OpAMD64VRNDSCALEPDMasked128,
+               ssa.OpAMD64VRNDSCALEPDMasked256,
+               ssa.OpAMD64VRNDSCALEPDMasked512,
+               ssa.OpAMD64VREDUCEPSMasked512,
+               ssa.OpAMD64VREDUCEPSMasked128,
+               ssa.OpAMD64VREDUCEPSMasked256,
+               ssa.OpAMD64VREDUCEPDMasked128,
+               ssa.OpAMD64VREDUCEPDMasked256,
+               ssa.OpAMD64VREDUCEPDMasked512,
                 ssa.OpAMD64VDIVPSMasked512,
                 ssa.OpAMD64VDIVPSMasked128,
                 ssa.OpAMD64VDIVPSMasked256,
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

index a9daf27548449171485679519dfa9b330f0f8d68..8bf896afb26a3cd9879bdd5395af90e02f5ca6c6 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -42,6 +42,10 @@
  (AddUint8x16 ...) => (VPADDB128 ...)
  (AddUint8x32 ...) => (VPADDB256 ...)
  (AddUint8x64 ...) => (VPADDB512 ...)
+(AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
+(AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
+(AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
+(AddSubFloat64x4 ...) => (VADDSUBPD256 ...)
  (AndFloat32x16 ...) => (VANDPS512 ...)
  (AndFloat32x4 ...) => (VANDPS128 ...)
  (AndFloat32x8 ...) => (VANDPS256 ...)
@@ -112,6 +116,70 @@
  (AverageUint8x16 ...) => (VPAVGB128 ...)
  (AverageUint8x32 ...) => (VPAVGB256 ...)
  (AverageUint8x64 ...) => (VPAVGB512 ...)
+(CeilFloat32x4 x) => (VROUNDPS128 [2] x)
+(CeilFloat32x8 x) => (VROUNDPS256 [2] x)
+(CeilFloat64x2 x) => (VROUNDPD128 [2] x)
+(CeilFloat64x4 x) => (VROUNDPD256 [2] x)
+(CeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+10] x)
+(CeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+10] x)
+(CeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+10] x)
+(CeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+10] x)
+(CeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+10] x)
+(CeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+10] x)
+(CeilWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+2] x)
+(CeilWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+2] x)
+(CeilWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+2] x)
+(CeilWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
+(CeilWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
+(CeilWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+10] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+10] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+10] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+10] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+10] x)
+(DiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+10] x)
+(DiffWithCeilWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
+(DiffWithCeilWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
+(DiffWithCeilWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
+(DiffWithCeilWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
+(DiffWithCeilWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
+(DiffWithCeilWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+9] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+9] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+9] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+9] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+9] x)
+(DiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+9] x)
+(DiffWithFloorWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
+(DiffWithFloorWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
+(DiffWithFloorWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
+(DiffWithFloorWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
+(DiffWithFloorWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
+(DiffWithFloorWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+8] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+8] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+8] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+8] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+8] x)
+(DiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+8] x)
+(DiffWithRoundWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
+(DiffWithRoundWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
+(DiffWithRoundWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
+(DiffWithRoundWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
+(DiffWithRoundWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
+(DiffWithRoundWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+11] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+11] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+11] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+11] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+11] x)
+(DiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+11] x)
+(DiffWithTruncWithPrecisionFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
+(DiffWithTruncWithPrecisionFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
+(DiffWithTruncWithPrecisionFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
+(DiffWithTruncWithPrecisionFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
+(DiffWithTruncWithPrecisionFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
+(DiffWithTruncWithPrecisionFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
  (DivFloat32x16 ...) => (VDIVPS512 ...)
  (DivFloat32x4 ...) => (VDIVPS128 ...)
  (DivFloat32x8 ...) => (VDIVPS256 ...)
@@ -148,6 +216,22 @@
  (EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y))
  (EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y))
  (EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
+(FloorFloat32x4 x) => (VROUNDPS128 [1] x)
+(FloorFloat32x8 x) => (VROUNDPS256 [1] x)
+(FloorFloat64x2 x) => (VROUNDPD128 [1] x)
+(FloorFloat64x4 x) => (VROUNDPD256 [1] x)
+(FloorSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+9] x)
+(FloorSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+9] x)
+(FloorSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+9] x)
+(FloorSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+9] x)
+(FloorSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+9] x)
+(FloorSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+9] x)
+(FloorWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+1] x)
+(FloorWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+1] x)
+(FloorWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+1] x)
+(FloorWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
+(FloorWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
+(FloorWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
  (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y))
  (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y)
  (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y)
@@ -370,6 +454,66 @@
  (MaskedAverageUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
  (MaskedAverageUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
  (MaskedAverageUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+10] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+10] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+10] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+10] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+10] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedCeilWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+10] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+10] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+10] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+10] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+10] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+9] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+9] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+9] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+9] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+9] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+8] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+8] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+8] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+8] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+8] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+11] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+11] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+11] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+11] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+11] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MaskedDivFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
  (MaskedDivFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
  (MaskedDivFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -406,6 +550,18 @@
  (MaskedEqualUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
  (MaskedEqualUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
  (MaskedEqualUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
+(MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+9] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+9] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+9] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+9] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+9] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedFloorWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MaskedGreaterFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [6] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
  (MaskedGreaterFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [6] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
  (MaskedGreaterFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [6] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@@ -697,6 +853,18 @@
  (MaskedPopCountUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
  (MaskedPopCountUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
  (MaskedPopCountUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+8] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+8] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+8] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedRoundWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MaskedSaturatedAddInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
  (MaskedSaturatedAddInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (MaskedSaturatedAddInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
@@ -757,6 +925,18 @@
  (MaskedSubUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
  (MaskedSubUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
  (MaskedSubUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+11] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+11] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+11] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+11] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+11] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+(MaskedTruncWithPrecisionFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MaskedXorFloat32x16 x y mask) => (VXORPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
  (MaskedXorFloat32x4 x y mask) => (VXORPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
  (MaskedXorFloat32x8 x y mask) => (VXORPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -976,6 +1156,22 @@
  (PopCountUint8x16 ...) => (VPOPCNTB128 ...)
  (PopCountUint8x32 ...) => (VPOPCNTB256 ...)
  (PopCountUint8x64 ...) => (VPOPCNTB512 ...)
+(RoundFloat32x4 x) => (VROUNDPS128 [0] x)
+(RoundFloat32x8 x) => (VROUNDPS256 [0] x)
+(RoundFloat64x2 x) => (VROUNDPD128 [0] x)
+(RoundFloat64x4 x) => (VROUNDPD256 [0] x)
+(RoundSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+8] x)
+(RoundSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+8] x)
+(RoundSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+8] x)
+(RoundSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+8] x)
+(RoundSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+8] x)
+(RoundSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+8] x)
+(RoundWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+0] x)
+(RoundWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+0] x)
+(RoundWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+0] x)
+(RoundWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
+(RoundWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
+(RoundWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
  (SaturatedAddInt16x16 ...) => (VPADDSW256 ...)
  (SaturatedAddInt16x32 ...) => (VPADDSW512 ...)
  (SaturatedAddInt16x8 ...) => (VPADDSW128 ...)
@@ -1046,6 +1242,22 @@
  (SubUint8x16 ...) => (VPSUBB128 ...)
  (SubUint8x32 ...) => (VPSUBB256 ...)
  (SubUint8x64 ...) => (VPSUBB512 ...)
+(TruncFloat32x4 x) => (VROUNDPS128 [3] x)
+(TruncFloat32x8 x) => (VROUNDPS256 [3] x)
+(TruncFloat64x2 x) => (VROUNDPD128 [3] x)
+(TruncFloat64x4 x) => (VROUNDPD256 [3] x)
+(TruncSuppressExceptionWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+11] x)
+(TruncSuppressExceptionWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+11] x)
+(TruncSuppressExceptionWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+11] x)
+(TruncSuppressExceptionWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+11] x)
+(TruncSuppressExceptionWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+11] x)
+(TruncSuppressExceptionWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+11] x)
+(TruncWithPrecisionFloat32x16 [a] x) => (VRNDSCALEPS512 [a+3] x)
+(TruncWithPrecisionFloat32x4 [a] x) => (VRNDSCALEPS128 [a+3] x)
+(TruncWithPrecisionFloat32x8 [a] x) => (VRNDSCALEPS256 [a+3] x)
+(TruncWithPrecisionFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
+(TruncWithPrecisionFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
+(TruncWithPrecisionFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
  (XorFloat32x16 ...) => (VXORPS512 ...)
  (XorFloat32x4 ...) => (VXORPS128 ...)
  (XorFloat32x8 ...) => (VXORPS256 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go

index b9709ca819210fdb1233b1c6bf7af825449240ab..6881757d1a1babf361566b0eec81766ed0c336d9 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -30,6 +30,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                 {name: "VSQRTPS512", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec512", resultInArg0: false},
                 {name: "VXORPS512", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec512", resultInArg0: false},
                 {name: "VADDPS128", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
+               {name: "VADDSUBPS128", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VANDPS128", argLength: 2, reg: fp21, asm: "VANDPS", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VANDNPS128", argLength: 2, reg: fp21, asm: "VANDNPS", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VRCP14PS128", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -58,6 +59,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                 {name: "VSQRTPS128", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VXORPS128", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VADDPS256", argLength: 2, reg: fp21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VADDSUBPS256", argLength: 2, reg: fp21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VANDPS256", argLength: 2, reg: fp21, asm: "VANDPS", commutative: true, typ: "Vec256", resultInArg0: false},
                 {name: "VANDNPS256", argLength: 2, reg: fp21, asm: "VANDNPS", commutative: true, typ: "Vec256", resultInArg0: false},
                 {name: "VRCP14PS256", argLength: 1, reg: fp11, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -86,6 +88,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                 {name: "VSQRTPS256", argLength: 1, reg: fp11, asm: "VSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VXORPS256", argLength: 2, reg: fp21, asm: "VXORPS", commutative: true, typ: "Vec256", resultInArg0: false},
                 {name: "VADDPD128", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec128", resultInArg0: false},
+               {name: "VADDSUBPD128", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VANDPD128", argLength: 2, reg: fp21, asm: "VANDPD", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VANDNPD128", argLength: 2, reg: fp21, asm: "VANDNPD", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VRCP14PD128", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -114,6 +117,7 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                 {name: "VSQRTPD128", argLength: 1, reg: fp11, asm: "VSQRTPD", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VXORPD128", argLength: 2, reg: fp21, asm: "VXORPD", commutative: true, typ: "Vec128", resultInArg0: false},
                 {name: "VADDPD256", argLength: 2, reg: fp21, asm: "VADDPD", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VADDSUBPD256", argLength: 2, reg: fp21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VANDPD256", argLength: 2, reg: fp21, asm: "VANDPD", commutative: true, typ: "Vec256", resultInArg0: false},
                 {name: "VANDNPD256", argLength: 2, reg: fp21, asm: "VANDNPD", commutative: true, typ: "Vec256", resultInArg0: false},
                 {name: "VRCP14PD256", argLength: 1, reg: fp11, asm: "VRCP14PD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -543,17 +547,45 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                 {name: "VPMINUBMasked512", argLength: 3, reg: fp2k1fp1, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
                 {name: "VPMAXUB512", argLength: 2, reg: fp21, asm: "VPMAXUB", commutative: true, typ: "Vec512", resultInArg0: false},
                 {name: "VPMINUB512", argLength: 2, reg: fp21, asm: "VPMINUB", commutative: true, typ: "Vec512", resultInArg0: false},
+               {name: "VRNDSCALEPS512", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VREDUCEPS512", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
                 {name: "VCMPPS512", argLength: 2, reg: fp2k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VRNDSCALEPSMasked512", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VREDUCEPSMasked512", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
                 {name: "VCMPPSMasked512", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VROUNDPS128", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VRNDSCALEPS128", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VREDUCEPS128", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VCMPPS128", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
+               {name: "VRNDSCALEPSMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VREDUCEPSMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VCMPPSMasked128", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VROUNDPS256", argLength: 1, reg: fp11, asm: "VROUNDPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VREDUCEPSMasked256", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPSMasked256", argLength: 3, reg: fp2k1k1, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VCMPPD128", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec128", resultInArg0: false},
+               {name: "VRNDSCALEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VREDUCEPDMasked128", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VCMPPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VROUNDPD256", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VRNDSCALEPD256", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VREDUCEPD256", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPD256", argLength: 2, reg: fp21, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VRNDSCALEPDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VREDUCEPDMasked256", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VRNDSCALEPD512", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VREDUCEPD512", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
                 {name: "VCMPPD512", argLength: 2, reg: fp2k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VRNDSCALEPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VREDUCEPDMasked512", argLength: 2, reg: fp1k1fp1, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
                 {name: "VCMPPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VCMPPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                 {name: "VPCMPW256", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                 {name: "VPCMPWMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go

index 529ec09de92d5c9fc975832f9f5b19e3d9ab03b1..25a496c52f0dea2499a668027ead48e3e32b255b 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -46,12 +46,15 @@ func simdGenericOps() []opData {
                 {name: "SubFloat32x16", argLength: 2, commutative: false},
                 {name: "XorFloat32x16", argLength: 2, commutative: true},
                 {name: "AddFloat32x4", argLength: 2, commutative: true},
+               {name: "AddSubFloat32x4", argLength: 2, commutative: false},
                 {name: "AndFloat32x4", argLength: 2, commutative: true},
                 {name: "AndNotFloat32x4", argLength: 2, commutative: true},
                 {name: "ApproximateReciprocalFloat32x4", argLength: 1, commutative: false},
                 {name: "ApproximateReciprocalOfSqrtFloat32x4", argLength: 1, commutative: false},
+               {name: "CeilFloat32x4", argLength: 1, commutative: false},
                 {name: "DivFloat32x4", argLength: 2, commutative: false},
                 {name: "EqualFloat32x4", argLength: 2, commutative: true},
+               {name: "FloorFloat32x4", argLength: 1, commutative: false},
                 {name: "GreaterFloat32x4", argLength: 2, commutative: false},
                 {name: "GreaterEqualFloat32x4", argLength: 2, commutative: false},
                 {name: "IsNanFloat32x4", argLength: 2, commutative: true},
@@ -86,16 +89,21 @@ func simdGenericOps() []opData {
                 {name: "OrFloat32x4", argLength: 2, commutative: true},
                 {name: "PairwiseAddFloat32x4", argLength: 2, commutative: false},
                 {name: "PairwiseSubFloat32x4", argLength: 2, commutative: false},
+               {name: "RoundFloat32x4", argLength: 1, commutative: false},
                 {name: "SqrtFloat32x4", argLength: 1, commutative: false},
                 {name: "SubFloat32x4", argLength: 2, commutative: false},
+               {name: "TruncFloat32x4", argLength: 1, commutative: false},
                 {name: "XorFloat32x4", argLength: 2, commutative: true},
                 {name: "AddFloat32x8", argLength: 2, commutative: true},
+               {name: "AddSubFloat32x8", argLength: 2, commutative: false},
                 {name: "AndFloat32x8", argLength: 2, commutative: true},
                 {name: "AndNotFloat32x8", argLength: 2, commutative: true},
                 {name: "ApproximateReciprocalFloat32x8", argLength: 1, commutative: false},
                 {name: "ApproximateReciprocalOfSqrtFloat32x8", argLength: 1, commutative: false},
+               {name: "CeilFloat32x8", argLength: 1, commutative: false},
                 {name: "DivFloat32x8", argLength: 2, commutative: false},
                 {name: "EqualFloat32x8", argLength: 2, commutative: true},
+               {name: "FloorFloat32x8", argLength: 1, commutative: false},
                 {name: "GreaterFloat32x8", argLength: 2, commutative: false},
                 {name: "GreaterEqualFloat32x8", argLength: 2, commutative: false},
                 {name: "IsNanFloat32x8", argLength: 2, commutative: true},
@@ -130,16 +138,21 @@ func simdGenericOps() []opData {
                 {name: "OrFloat32x8", argLength: 2, commutative: true},
                 {name: "PairwiseAddFloat32x8", argLength: 2, commutative: false},
                 {name: "PairwiseSubFloat32x8", argLength: 2, commutative: false},
+               {name: "RoundFloat32x8", argLength: 1, commutative: false},
                 {name: "SqrtFloat32x8", argLength: 1, commutative: false},
                 {name: "SubFloat32x8", argLength: 2, commutative: false},
+               {name: "TruncFloat32x8", argLength: 1, commutative: false},
                 {name: "XorFloat32x8", argLength: 2, commutative: true},
                 {name: "AddFloat64x2", argLength: 2, commutative: true},
+               {name: "AddSubFloat64x2", argLength: 2, commutative: false},
                 {name: "AndFloat64x2", argLength: 2, commutative: true},
                 {name: "AndNotFloat64x2", argLength: 2, commutative: true},
                 {name: "ApproximateReciprocalFloat64x2", argLength: 1, commutative: false},
                 {name: "ApproximateReciprocalOfSqrtFloat64x2", argLength: 1, commutative: false},
+               {name: "CeilFloat64x2", argLength: 1, commutative: false},
                 {name: "DivFloat64x2", argLength: 2, commutative: false},
                 {name: "EqualFloat64x2", argLength: 2, commutative: true},
+               {name: "FloorFloat64x2", argLength: 1, commutative: false},
                 {name: "GreaterFloat64x2", argLength: 2, commutative: false},
                 {name: "GreaterEqualFloat64x2", argLength: 2, commutative: false},
                 {name: "IsNanFloat64x2", argLength: 2, commutative: true},
@@ -174,16 +187,21 @@ func simdGenericOps() []opData {
                 {name: "OrFloat64x2", argLength: 2, commutative: true},
                 {name: "PairwiseAddFloat64x2", argLength: 2, commutative: false},
                 {name: "PairwiseSubFloat64x2", argLength: 2, commutative: false},
+               {name: "RoundFloat64x2", argLength: 1, commutative: false},
                 {name: "SqrtFloat64x2", argLength: 1, commutative: false},
                 {name: "SubFloat64x2", argLength: 2, commutative: false},
+               {name: "TruncFloat64x2", argLength: 1, commutative: false},
                 {name: "XorFloat64x2", argLength: 2, commutative: true},
                 {name: "AddFloat64x4", argLength: 2, commutative: true},
+               {name: "AddSubFloat64x4", argLength: 2, commutative: false},
                 {name: "AndFloat64x4", argLength: 2, commutative: true},
                 {name: "AndNotFloat64x4", argLength: 2, commutative: true},
                 {name: "ApproximateReciprocalFloat64x4", argLength: 1, commutative: false},
                 {name: "ApproximateReciprocalOfSqrtFloat64x4", argLength: 1, commutative: false},
+               {name: "CeilFloat64x4", argLength: 1, commutative: false},
                 {name: "DivFloat64x4", argLength: 2, commutative: false},
                 {name: "EqualFloat64x4", argLength: 2, commutative: true},
+               {name: "FloorFloat64x4", argLength: 1, commutative: false},
                 {name: "GreaterFloat64x4", argLength: 2, commutative: false},
                 {name: "GreaterEqualFloat64x4", argLength: 2, commutative: false},
                 {name: "IsNanFloat64x4", argLength: 2, commutative: true},
@@ -218,8 +236,10 @@ func simdGenericOps() []opData {
                 {name: "OrFloat64x4", argLength: 2, commutative: true},
                 {name: "PairwiseAddFloat64x4", argLength: 2, commutative: false},
                 {name: "PairwiseSubFloat64x4", argLength: 2, commutative: false},
+               {name: "RoundFloat64x4", argLength: 1, commutative: false},
                 {name: "SqrtFloat64x4", argLength: 1, commutative: false},
                 {name: "SubFloat64x4", argLength: 2, commutative: false},
+               {name: "TruncFloat64x4", argLength: 1, commutative: false},
                 {name: "XorFloat64x4", argLength: 2, commutative: true},
                 {name: "AddFloat64x8", argLength: 2, commutative: true},
                 {name: "AndFloat64x8", argLength: 2, commutative: true},
@@ -1075,5 +1095,197 @@ func simdGenericOps() []opData {
                 {name: "SaturatedAddUint8x64", argLength: 2, commutative: true},
                 {name: "SaturatedSubUint8x64", argLength: 2, commutative: false},
                 {name: "SubUint8x64", argLength: 2, commutative: false},
+               {name: "CeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat32x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat32x16", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat32x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithFloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithRoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "DiffWithTruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "FloorWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithCeilWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedDiffWithTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedFloorWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedRoundWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncSuppressExceptionWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "RoundSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
         }
  }
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index c7abca814e9a004f722cf46b3ffdcd494f8fd3e6..090cf6903218f996895168b10971770c0ec6cf06 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1223,6 +1223,7 @@ const (
         OpAMD64VSQRTPS512
         OpAMD64VXORPS512
         OpAMD64VADDPS128
+       OpAMD64VADDSUBPS128
         OpAMD64VANDPS128
         OpAMD64VANDNPS128
         OpAMD64VRCP14PS128
@@ -1251,6 +1252,7 @@ const (
         OpAMD64VSQRTPS128
         OpAMD64VXORPS128
         OpAMD64VADDPS256
+       OpAMD64VADDSUBPS256
         OpAMD64VANDPS256
         OpAMD64VANDNPS256
         OpAMD64VRCP14PS256
@@ -1279,6 +1281,7 @@ const (
         OpAMD64VSQRTPS256
         OpAMD64VXORPS256
         OpAMD64VADDPD128
+       OpAMD64VADDSUBPD128
         OpAMD64VANDPD128
         OpAMD64VANDNPD128
         OpAMD64VRCP14PD128
@@ -1307,6 +1310,7 @@ const (
         OpAMD64VSQRTPD128
         OpAMD64VXORPD128
         OpAMD64VADDPD256
+       OpAMD64VADDSUBPD256
         OpAMD64VANDPD256
         OpAMD64VANDNPD256
         OpAMD64VRCP14PD256
@@ -1736,17 +1740,45 @@ const (
         OpAMD64VPMINUBMasked512
         OpAMD64VPMAXUB512
         OpAMD64VPMINUB512
+       OpAMD64VRNDSCALEPS512
+       OpAMD64VREDUCEPS512
         OpAMD64VCMPPS512
+       OpAMD64VRNDSCALEPSMasked512
+       OpAMD64VREDUCEPSMasked512
         OpAMD64VCMPPSMasked512
+       OpAMD64VROUNDPS128
+       OpAMD64VRNDSCALEPS128
+       OpAMD64VREDUCEPS128
         OpAMD64VCMPPS128
+       OpAMD64VRNDSCALEPSMasked128
+       OpAMD64VREDUCEPSMasked128
         OpAMD64VCMPPSMasked128
+       OpAMD64VROUNDPS256
+       OpAMD64VRNDSCALEPS256
+       OpAMD64VREDUCEPS256
         OpAMD64VCMPPS256
+       OpAMD64VRNDSCALEPSMasked256
+       OpAMD64VREDUCEPSMasked256
         OpAMD64VCMPPSMasked256
+       OpAMD64VROUNDPD128
+       OpAMD64VRNDSCALEPD128
+       OpAMD64VREDUCEPD128
         OpAMD64VCMPPD128
+       OpAMD64VRNDSCALEPDMasked128
+       OpAMD64VREDUCEPDMasked128
         OpAMD64VCMPPDMasked128
+       OpAMD64VROUNDPD256
+       OpAMD64VRNDSCALEPD256
+       OpAMD64VREDUCEPD256
         OpAMD64VCMPPD256
+       OpAMD64VRNDSCALEPDMasked256
+       OpAMD64VREDUCEPDMasked256
         OpAMD64VCMPPDMasked256
+       OpAMD64VRNDSCALEPD512
+       OpAMD64VREDUCEPD512
         OpAMD64VCMPPD512
+       OpAMD64VRNDSCALEPDMasked512
+       OpAMD64VREDUCEPDMasked512
         OpAMD64VCMPPDMasked512
         OpAMD64VPCMPW256
         OpAMD64VPCMPWMasked256
@@ -4065,12 +4097,15 @@ const (
         OpSubFloat32x16
         OpXorFloat32x16
         OpAddFloat32x4
+       OpAddSubFloat32x4
         OpAndFloat32x4
         OpAndNotFloat32x4
         OpApproximateReciprocalFloat32x4
         OpApproximateReciprocalOfSqrtFloat32x4
+       OpCeilFloat32x4
         OpDivFloat32x4
         OpEqualFloat32x4
+       OpFloorFloat32x4
         OpGreaterFloat32x4
         OpGreaterEqualFloat32x4
         OpIsNanFloat32x4
@@ -4105,16 +4140,21 @@ const (
         OpOrFloat32x4
         OpPairwiseAddFloat32x4
         OpPairwiseSubFloat32x4
+       OpRoundFloat32x4
         OpSqrtFloat32x4
         OpSubFloat32x4
+       OpTruncFloat32x4
         OpXorFloat32x4
         OpAddFloat32x8
+       OpAddSubFloat32x8
         OpAndFloat32x8
         OpAndNotFloat32x8
         OpApproximateReciprocalFloat32x8
         OpApproximateReciprocalOfSqrtFloat32x8
+       OpCeilFloat32x8
         OpDivFloat32x8
         OpEqualFloat32x8
+       OpFloorFloat32x8
         OpGreaterFloat32x8
         OpGreaterEqualFloat32x8
         OpIsNanFloat32x8
@@ -4149,16 +4189,21 @@ const (
         OpOrFloat32x8
         OpPairwiseAddFloat32x8
         OpPairwiseSubFloat32x8
+       OpRoundFloat32x8
         OpSqrtFloat32x8
         OpSubFloat32x8
+       OpTruncFloat32x8
         OpXorFloat32x8
         OpAddFloat64x2
+       OpAddSubFloat64x2
         OpAndFloat64x2
         OpAndNotFloat64x2
         OpApproximateReciprocalFloat64x2
         OpApproximateReciprocalOfSqrtFloat64x2
+       OpCeilFloat64x2
         OpDivFloat64x2
         OpEqualFloat64x2
+       OpFloorFloat64x2
         OpGreaterFloat64x2
         OpGreaterEqualFloat64x2
         OpIsNanFloat64x2
@@ -4193,16 +4238,21 @@ const (
         OpOrFloat64x2
         OpPairwiseAddFloat64x2
         OpPairwiseSubFloat64x2
+       OpRoundFloat64x2
         OpSqrtFloat64x2
         OpSubFloat64x2
+       OpTruncFloat64x2
         OpXorFloat64x2
         OpAddFloat64x4
+       OpAddSubFloat64x4
         OpAndFloat64x4
         OpAndNotFloat64x4
         OpApproximateReciprocalFloat64x4
         OpApproximateReciprocalOfSqrtFloat64x4
+       OpCeilFloat64x4
         OpDivFloat64x4
         OpEqualFloat64x4
+       OpFloorFloat64x4
         OpGreaterFloat64x4
         OpGreaterEqualFloat64x4
         OpIsNanFloat64x4
@@ -4237,8 +4287,10 @@ const (
         OpOrFloat64x4
         OpPairwiseAddFloat64x4
         OpPairwiseSubFloat64x4
+       OpRoundFloat64x4
         OpSqrtFloat64x4
         OpSubFloat64x4
+       OpTruncFloat64x4
         OpXorFloat64x4
         OpAddFloat64x8
         OpAndFloat64x8
@@ -5094,6 +5146,198 @@ const (
         OpSaturatedAddUint8x64
         OpSaturatedSubUint8x64
         OpSubUint8x64
+       OpCeilSuppressExceptionWithPrecisionFloat32x16
+       OpCeilWithPrecisionFloat32x16
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16
+       OpDiffWithCeilWithPrecisionFloat32x16
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16
+       OpDiffWithFloorWithPrecisionFloat32x16
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16
+       OpDiffWithRoundWithPrecisionFloat32x16
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16
+       OpDiffWithTruncWithPrecisionFloat32x16
+       OpFloorSuppressExceptionWithPrecisionFloat32x16
+       OpFloorWithPrecisionFloat32x16
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedCeilWithPrecisionFloat32x16
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedDiffWithCeilWithPrecisionFloat32x16
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedDiffWithFloorWithPrecisionFloat32x16
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedDiffWithRoundWithPrecisionFloat32x16
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedDiffWithTruncWithPrecisionFloat32x16
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedFloorWithPrecisionFloat32x16
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedRoundWithPrecisionFloat32x16
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16
+       OpMaskedTruncWithPrecisionFloat32x16
+       OpRoundSuppressExceptionWithPrecisionFloat32x16
+       OpRoundWithPrecisionFloat32x16
+       OpTruncSuppressExceptionWithPrecisionFloat32x16
+       OpTruncWithPrecisionFloat32x16
+       OpCeilSuppressExceptionWithPrecisionFloat32x4
+       OpCeilWithPrecisionFloat32x4
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4
+       OpDiffWithCeilWithPrecisionFloat32x4
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4
+       OpDiffWithFloorWithPrecisionFloat32x4
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4
+       OpDiffWithRoundWithPrecisionFloat32x4
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4
+       OpDiffWithTruncWithPrecisionFloat32x4
+       OpFloorSuppressExceptionWithPrecisionFloat32x4
+       OpFloorWithPrecisionFloat32x4
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedCeilWithPrecisionFloat32x4
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedDiffWithCeilWithPrecisionFloat32x4
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedDiffWithFloorWithPrecisionFloat32x4
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedDiffWithRoundWithPrecisionFloat32x4
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedDiffWithTruncWithPrecisionFloat32x4
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedFloorWithPrecisionFloat32x4
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedRoundWithPrecisionFloat32x4
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4
+       OpMaskedTruncWithPrecisionFloat32x4
+       OpRoundSuppressExceptionWithPrecisionFloat32x4
+       OpRoundWithPrecisionFloat32x4
+       OpTruncSuppressExceptionWithPrecisionFloat32x4
+       OpTruncWithPrecisionFloat32x4
+       OpCeilSuppressExceptionWithPrecisionFloat32x8
+       OpCeilWithPrecisionFloat32x8
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8
+       OpDiffWithCeilWithPrecisionFloat32x8
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8
+       OpDiffWithFloorWithPrecisionFloat32x8
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8
+       OpDiffWithRoundWithPrecisionFloat32x8
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8
+       OpDiffWithTruncWithPrecisionFloat32x8
+       OpFloorSuppressExceptionWithPrecisionFloat32x8
+       OpFloorWithPrecisionFloat32x8
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedCeilWithPrecisionFloat32x8
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedDiffWithCeilWithPrecisionFloat32x8
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedDiffWithFloorWithPrecisionFloat32x8
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedDiffWithRoundWithPrecisionFloat32x8
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedDiffWithTruncWithPrecisionFloat32x8
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedFloorWithPrecisionFloat32x8
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedRoundWithPrecisionFloat32x8
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8
+       OpMaskedTruncWithPrecisionFloat32x8
+       OpRoundSuppressExceptionWithPrecisionFloat32x8
+       OpRoundWithPrecisionFloat32x8
+       OpTruncSuppressExceptionWithPrecisionFloat32x8
+       OpTruncWithPrecisionFloat32x8
+       OpCeilSuppressExceptionWithPrecisionFloat64x2
+       OpCeilWithPrecisionFloat64x2
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2
+       OpDiffWithCeilWithPrecisionFloat64x2
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2
+       OpDiffWithFloorWithPrecisionFloat64x2
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2
+       OpDiffWithRoundWithPrecisionFloat64x2
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2
+       OpDiffWithTruncWithPrecisionFloat64x2
+       OpFloorSuppressExceptionWithPrecisionFloat64x2
+       OpFloorWithPrecisionFloat64x2
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedCeilWithPrecisionFloat64x2
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedDiffWithCeilWithPrecisionFloat64x2
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedDiffWithFloorWithPrecisionFloat64x2
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedDiffWithRoundWithPrecisionFloat64x2
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedDiffWithTruncWithPrecisionFloat64x2
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedFloorWithPrecisionFloat64x2
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedRoundWithPrecisionFloat64x2
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2
+       OpMaskedTruncWithPrecisionFloat64x2
+       OpRoundSuppressExceptionWithPrecisionFloat64x2
+       OpRoundWithPrecisionFloat64x2
+       OpTruncSuppressExceptionWithPrecisionFloat64x2
+       OpTruncWithPrecisionFloat64x2
+       OpCeilSuppressExceptionWithPrecisionFloat64x4
+       OpCeilWithPrecisionFloat64x4
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4
+       OpDiffWithCeilWithPrecisionFloat64x4
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4
+       OpDiffWithFloorWithPrecisionFloat64x4
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4
+       OpDiffWithRoundWithPrecisionFloat64x4
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4
+       OpDiffWithTruncWithPrecisionFloat64x4
+       OpFloorSuppressExceptionWithPrecisionFloat64x4
+       OpFloorWithPrecisionFloat64x4
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedCeilWithPrecisionFloat64x4
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedDiffWithCeilWithPrecisionFloat64x4
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedDiffWithFloorWithPrecisionFloat64x4
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedDiffWithRoundWithPrecisionFloat64x4
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedDiffWithTruncWithPrecisionFloat64x4
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedFloorWithPrecisionFloat64x4
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedRoundWithPrecisionFloat64x4
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4
+       OpMaskedTruncWithPrecisionFloat64x4
+       OpRoundSuppressExceptionWithPrecisionFloat64x4
+       OpRoundWithPrecisionFloat64x4
+       OpTruncSuppressExceptionWithPrecisionFloat64x4
+       OpTruncWithPrecisionFloat64x4
+       OpCeilSuppressExceptionWithPrecisionFloat64x8
+       OpCeilWithPrecisionFloat64x8
+       OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8
+       OpDiffWithCeilWithPrecisionFloat64x8
+       OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8
+       OpDiffWithFloorWithPrecisionFloat64x8
+       OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8
+       OpDiffWithRoundWithPrecisionFloat64x8
+       OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8
+       OpDiffWithTruncWithPrecisionFloat64x8
+       OpFloorSuppressExceptionWithPrecisionFloat64x8
+       OpFloorWithPrecisionFloat64x8
+       OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedCeilWithPrecisionFloat64x8
+       OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedDiffWithCeilWithPrecisionFloat64x8
+       OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedDiffWithFloorWithPrecisionFloat64x8
+       OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedDiffWithRoundWithPrecisionFloat64x8
+       OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedDiffWithTruncWithPrecisionFloat64x8
+       OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedFloorWithPrecisionFloat64x8
+       OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedRoundWithPrecisionFloat64x8
+       OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8
+       OpMaskedTruncWithPrecisionFloat64x8
+       OpRoundSuppressExceptionWithPrecisionFloat64x8
+       OpRoundWithPrecisionFloat64x8
+       OpTruncSuppressExceptionWithPrecisionFloat64x8
+       OpTruncWithPrecisionFloat64x8
  )
  
  var opcodeTable = [...]opInfo{
@@ -18091,6 +18335,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:   "VADDSUBPS128",
+               argLen: 2,
+               asm:    x86.AVADDSUBPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VANDPS128",
                 argLen:      2,
@@ -18506,6 +18764,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:   "VADDSUBPS256",
+               argLen: 2,
+               asm:    x86.AVADDSUBPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VANDPS256",
                 argLen:      2,
@@ -18921,6 +19193,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:   "VADDSUBPD128",
+               argLen: 2,
+               asm:    x86.AVADDSUBPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VANDPD128",
                 argLen:      2,
@@ -19336,6 +19622,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:   "VADDSUBPD256",
+               argLen: 2,
+               asm:    x86.AVADDSUBPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VANDPD256",
                 argLen:      2,
@@ -25772,6 +26072,34 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPS512",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPS512",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPS512",
                 auxType:     auxInt8,
@@ -25788,6 +26116,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPSMasked512",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPSMasked512",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPSMasked512",
                 auxType:     auxInt8,
@@ -25805,6 +26163,48 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VROUNDPS128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVROUNDPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VRNDSCALEPS128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPS128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPS128",
                 auxType:     auxInt8,
@@ -25821,6 +26221,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPSMasked128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPSMasked128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPSMasked128",
                 auxType:     auxInt8,
@@ -25838,6 +26268,48 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VROUNDPS256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVROUNDPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VRNDSCALEPS256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPS256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPS256",
                 auxType:     auxInt8,
@@ -25854,6 +26326,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPSMasked256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPSMasked256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPSMasked256",
                 auxType:     auxInt8,
@@ -25871,6 +26373,48 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VROUNDPD128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVROUNDPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VRNDSCALEPD128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPD128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPD128",
                 auxType:     auxInt8,
@@ -25887,6 +26431,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPDMasked128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPDMasked128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPDMasked128",
                 auxType:     auxInt8,
@@ -25904,6 +26478,48 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VROUNDPD256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVROUNDPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VRNDSCALEPD256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPD256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPD256",
                 auxType:     auxInt8,
@@ -25920,6 +26536,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPDMasked256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPDMasked256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPDMasked256",
                 auxType:     auxInt8,
@@ -25937,6 +26583,34 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPD512",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPD512",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPD512",
                 auxType:     auxInt8,
@@ -25953,6 +26627,36 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VRNDSCALEPDMasked512",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVRNDSCALEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VREDUCEPDMasked512",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVREDUCEPD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 1090921693184}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},    // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:        "VCMPPDMasked512",
                 auxType:     auxInt8,
@@ -54128,6 +54832,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "AddSubFloat32x4",
+               argLen:  2,
+               generic: true,
+       },
         {
                 name:        "AndFloat32x4",
                 argLen:      2,
@@ -54150,6 +54859,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "CeilFloat32x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "DivFloat32x4",
                 argLen:  2,
@@ -54161,6 +54875,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "FloorFloat32x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "GreaterFloat32x4",
                 argLen:  2,
@@ -54348,6 +55067,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "RoundFloat32x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "SqrtFloat32x4",
                 argLen:  1,
@@ -54358,6 +55082,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "TruncFloat32x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:        "XorFloat32x4",
                 argLen:      2,
@@ -54370,6 +55099,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "AddSubFloat32x8",
+               argLen:  2,
+               generic: true,
+       },
         {
                 name:        "AndFloat32x8",
                 argLen:      2,
@@ -54392,6 +55126,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "CeilFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "DivFloat32x8",
                 argLen:  2,
@@ -54403,6 +55142,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "FloorFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "GreaterFloat32x8",
                 argLen:  2,
@@ -54590,6 +55334,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "RoundFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "SqrtFloat32x8",
                 argLen:  1,
@@ -54600,6 +55349,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "TruncFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:        "XorFloat32x8",
                 argLen:      2,
@@ -54612,6 +55366,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "AddSubFloat64x2",
+               argLen:  2,
+               generic: true,
+       },
         {
                 name:        "AndFloat64x2",
                 argLen:      2,
@@ -54634,6 +55393,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "CeilFloat64x2",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "DivFloat64x2",
                 argLen:  2,
@@ -54645,6 +55409,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "FloorFloat64x2",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "GreaterFloat64x2",
                 argLen:  2,
@@ -54832,6 +55601,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "RoundFloat64x2",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "SqrtFloat64x2",
                 argLen:  1,
@@ -54842,6 +55616,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "TruncFloat64x2",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:        "XorFloat64x2",
                 argLen:      2,
@@ -54854,6 +55633,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "AddSubFloat64x4",
+               argLen:  2,
+               generic: true,
+       },
         {
                 name:        "AndFloat64x4",
                 argLen:      2,
@@ -54876,6 +55660,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "CeilFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "DivFloat64x4",
                 argLen:  2,
@@ -54887,6 +55676,11 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:    "FloorFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "GreaterFloat64x4",
                 argLen:  2,
@@ -55074,6 +55868,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "RoundFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "SqrtFloat64x4",
                 argLen:  1,
@@ -55084,6 +55883,11 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "TruncFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:        "XorFloat64x4",
                 argLen:      2,
@@ -59832,6 +60636,1158 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat32x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "CeilWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithCeilWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithFloorWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithRoundWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "DiffWithTruncWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "FloorWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedCeilWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithCeilWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithFloorWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithRoundWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedDiffWithTruncWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedFloorWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedRoundWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "MaskedTruncWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "RoundSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "RoundWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncSuppressExceptionWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "TruncWithPrecisionFloat64x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
  }
  
  func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 86fbc988cfad2f006df015f9645610d926afa689..a6cf0a0b7bd0b10c035a22f42fc7012651ee6755 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -664,6 +664,18 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddPtr:
                 v.Op = OpAMD64ADDQ
                 return true
+       case OpAddSubFloat32x4:
+               v.Op = OpAMD64VADDSUBPS128
+               return true
+       case OpAddSubFloat32x8:
+               v.Op = OpAMD64VADDSUBPS256
+               return true
+       case OpAddSubFloat64x2:
+               v.Op = OpAMD64VADDSUBPD128
+               return true
+       case OpAddSubFloat64x4:
+               v.Op = OpAMD64VADDSUBPD256
+               return true
         case OpAddUint16x16:
                 v.Op = OpAMD64VPADDW256
                 return true
@@ -994,6 +1006,38 @@ func rewriteValueAMD64(v *Value) bool {
                 return true
         case OpCeil:
                 return rewriteValueAMD64_OpCeil(v)
+       case OpCeilFloat32x4:
+               return rewriteValueAMD64_OpCeilFloat32x4(v)
+       case OpCeilFloat32x8:
+               return rewriteValueAMD64_OpCeilFloat32x8(v)
+       case OpCeilFloat64x2:
+               return rewriteValueAMD64_OpCeilFloat64x2(v)
+       case OpCeilFloat64x4:
+               return rewriteValueAMD64_OpCeilFloat64x4(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpCeilSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpCeilWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v)
+       case OpCeilWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v)
+       case OpCeilWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v)
+       case OpCeilWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v)
+       case OpCeilWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v)
+       case OpCeilWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v)
         case OpClosureCall:
                 v.Op = OpAMD64CALLclosure
                 return true
@@ -1080,6 +1124,102 @@ func rewriteValueAMD64(v *Value) bool {
         case OpCvtBoolToUint8:
                 v.Op = OpCopy
                 return true
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpDiffWithCeilWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v)
+       case OpDiffWithCeilWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v)
+       case OpDiffWithCeilWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v)
+       case OpDiffWithCeilWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v)
+       case OpDiffWithCeilWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v)
+       case OpDiffWithCeilWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpDiffWithFloorWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v)
+       case OpDiffWithFloorWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v)
+       case OpDiffWithFloorWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v)
+       case OpDiffWithFloorWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v)
+       case OpDiffWithFloorWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v)
+       case OpDiffWithFloorWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpDiffWithRoundWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v)
+       case OpDiffWithRoundWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v)
+       case OpDiffWithRoundWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v)
+       case OpDiffWithRoundWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v)
+       case OpDiffWithRoundWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v)
+       case OpDiffWithRoundWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpDiffWithTruncWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v)
+       case OpDiffWithTruncWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v)
+       case OpDiffWithTruncWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v)
+       case OpDiffWithTruncWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v)
+       case OpDiffWithTruncWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v)
+       case OpDiffWithTruncWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v)
         case OpDiv128u:
                 v.Op = OpAMD64DIVQU2
                 return true
@@ -1211,6 +1351,38 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpFMA(v)
         case OpFloor:
                 return rewriteValueAMD64_OpFloor(v)
+       case OpFloorFloat32x4:
+               return rewriteValueAMD64_OpFloorFloat32x4(v)
+       case OpFloorFloat32x8:
+               return rewriteValueAMD64_OpFloorFloat32x8(v)
+       case OpFloorFloat64x2:
+               return rewriteValueAMD64_OpFloorFloat64x2(v)
+       case OpFloorFloat64x4:
+               return rewriteValueAMD64_OpFloorFloat64x4(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpFloorSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpFloorWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v)
+       case OpFloorWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v)
+       case OpFloorWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v)
+       case OpFloorWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v)
+       case OpFloorWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v)
+       case OpFloorWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v)
         case OpGetCallerPC:
                 v.Op = OpAMD64LoweredGetCallerPC
                 return true
@@ -1772,6 +1944,126 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpMaskedAverageUint8x32(v)
         case OpMaskedAverageUint8x64:
                 return rewriteValueAMD64_OpMaskedAverageUint8x64(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedCeilWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v)
+       case OpMaskedCeilWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v)
+       case OpMaskedCeilWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v)
+       case OpMaskedCeilWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v)
+       case OpMaskedCeilWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v)
+       case OpMaskedCeilWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithCeilWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithFloorWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithRoundWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v)
+       case OpMaskedDiffWithTruncWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v)
         case OpMaskedDivFloat32x16:
                 return rewriteValueAMD64_OpMaskedDivFloat32x16(v)
         case OpMaskedDivFloat32x4:
@@ -1844,6 +2136,30 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpMaskedEqualUint8x32(v)
         case OpMaskedEqualUint8x64:
                 return rewriteValueAMD64_OpMaskedEqualUint8x64(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedFloorWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v)
+       case OpMaskedFloorWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v)
+       case OpMaskedFloorWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v)
+       case OpMaskedFloorWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v)
+       case OpMaskedFloorWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v)
+       case OpMaskedFloorWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v)
         case OpMaskedGreaterEqualFloat32x16:
                 return rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v)
         case OpMaskedGreaterEqualFloat32x4:
@@ -2426,6 +2742,30 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpMaskedPopCountUint8x32(v)
         case OpMaskedPopCountUint8x64:
                 return rewriteValueAMD64_OpMaskedPopCountUint8x64(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedRoundWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v)
+       case OpMaskedRoundWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v)
+       case OpMaskedRoundWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v)
+       case OpMaskedRoundWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v)
+       case OpMaskedRoundWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v)
+       case OpMaskedRoundWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v)
         case OpMaskedSaturatedAddInt16x16:
                 return rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v)
         case OpMaskedSaturatedAddInt16x32:
@@ -2546,6 +2886,30 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpMaskedSubUint8x32(v)
         case OpMaskedSubUint8x64:
                 return rewriteValueAMD64_OpMaskedSubUint8x64(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpMaskedTruncWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v)
+       case OpMaskedTruncWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v)
+       case OpMaskedTruncWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v)
+       case OpMaskedTruncWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v)
+       case OpMaskedTruncWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v)
+       case OpMaskedTruncWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v)
         case OpMaskedXorFloat32x16:
                 return rewriteValueAMD64_OpMaskedXorFloat32x16(v)
         case OpMaskedXorFloat32x4:
@@ -3292,8 +3656,40 @@ func rewriteValueAMD64(v *Value) bool {
         case OpRound64F:
                 v.Op = OpAMD64LoweredRound64F
                 return true
+       case OpRoundFloat32x4:
+               return rewriteValueAMD64_OpRoundFloat32x4(v)
+       case OpRoundFloat32x8:
+               return rewriteValueAMD64_OpRoundFloat32x8(v)
+       case OpRoundFloat64x2:
+               return rewriteValueAMD64_OpRoundFloat64x2(v)
+       case OpRoundFloat64x4:
+               return rewriteValueAMD64_OpRoundFloat64x4(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpRoundSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x8(v)
         case OpRoundToEven:
                 return rewriteValueAMD64_OpRoundToEven(v)
+       case OpRoundWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v)
+       case OpRoundWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v)
+       case OpRoundWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v)
+       case OpRoundWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v)
+       case OpRoundWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v)
+       case OpRoundWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v)
         case OpRsh16Ux16:
                 return rewriteValueAMD64_OpRsh16Ux16(v)
         case OpRsh16Ux32:
@@ -3653,6 +4049,38 @@ func rewriteValueAMD64(v *Value) bool {
         case OpTrunc64to8:
                 v.Op = OpCopy
                 return true
+       case OpTruncFloat32x4:
+               return rewriteValueAMD64_OpTruncFloat32x4(v)
+       case OpTruncFloat32x8:
+               return rewriteValueAMD64_OpTruncFloat32x8(v)
+       case OpTruncFloat64x2:
+               return rewriteValueAMD64_OpTruncFloat64x2(v)
+       case OpTruncFloat64x4:
+               return rewriteValueAMD64_OpTruncFloat64x4(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x16(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x4(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x8(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x2(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x4(v)
+       case OpTruncSuppressExceptionWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x8(v)
+       case OpTruncWithPrecisionFloat32x16:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v)
+       case OpTruncWithPrecisionFloat32x4:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v)
+       case OpTruncWithPrecisionFloat32x8:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v)
+       case OpTruncWithPrecisionFloat64x2:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v)
+       case OpTruncWithPrecisionFloat64x4:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v)
+       case OpTruncWithPrecisionFloat64x8:
+               return rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v)
         case OpWB:
                 v.Op = OpAMD64LoweredWB
                 return true
@@ -27029,6 +27457,210 @@ func rewriteValueAMD64_OpCeil(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilFloat32x4 x)
+       // result: (VROUNDPS128 [2] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilFloat32x8 x)
+       // result: (VROUNDPS256 [2] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilFloat64x2 x)
+       // result: (VROUNDPD128 [2] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilFloat64x4 x)
+       // result: (VROUNDPD256 [2] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpCondSelect(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -28162,6 +28794,630 @@ func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
         }
         return false
  }
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+10] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithCeilWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithCeilWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+2] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithFloorWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithFloorWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithRoundWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithRoundWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpDiffWithTruncWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (DiffWithTruncWithPrecisionFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpDiv16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28843,6 +30099,210 @@ func rewriteValueAMD64_OpFloor(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorFloat32x4 x)
+       // result: (VROUNDPS128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorFloat32x8 x)
+       // result: (VROUNDPS256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorFloat64x2 x)
+       // result: (VROUNDPD128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorFloat64x4 x)
+       // result: (VROUNDPD256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+9] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (FloorWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+1] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpGetG(v *Value) bool {
         v_0 := v.Args[0]
         // match: (GetG mem)
@@ -33790,6 +35250,1086 @@ func rewriteValueAMD64_OpMaskedAverageUint8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+10] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+10] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+10] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+10] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+10] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+10] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedCeilWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedCeilWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+10] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+10] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+10] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+10] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+10] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+10] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 10)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithCeilWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithCeilWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 2)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+9] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+9] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+9] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+9] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+9] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+9] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithFloorWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithFloorWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+8] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+8] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+8] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+8] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+8] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+8] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithRoundWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithRoundWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+11] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+11] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+11] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+11] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+11] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+11] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat32x16 [a] x mask)
+       // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat32x4 [a] x mask)
+       // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat32x8 [a] x mask)
+       // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat64x2 [a] x mask)
+       // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat64x4 [a] x mask)
+       // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedDiffWithTruncWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedDiffWithTruncWithPrecisionFloat64x8 [a] x mask)
+       // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VREDUCEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
  func rewriteValueAMD64_OpMaskedDivFloat32x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -34546,6 +37086,222 @@ func rewriteValueAMD64_OpMaskedEqualUint8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+9] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+9] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+9] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+9] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+9] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+9] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 9)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedFloorWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedFloorWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 1)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
  func rewriteValueAMD64_OpMaskedGreaterEqualFloat32x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -40348,6 +43104,222 @@ func rewriteValueAMD64_OpMaskedPopCountUint8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+8] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+8] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+8] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+8] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+8] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+8] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedRoundWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedRoundWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
  func rewriteValueAMD64_OpMaskedSaturatedAddInt16x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -41416,6 +44388,222 @@ func rewriteValueAMD64_OpMaskedSubUint8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+11] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+11] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+11] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+11] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+11] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncSuppressExceptionWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+11] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat32x16 [a] x mask)
+       // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat32x4 [a] x mask)
+       // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat32x8 [a] x mask)
+       // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPSMasked256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat64x2 [a] x mask)
+       // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat64x4 [a] x mask)
+       // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpMaskedTruncWithPrecisionFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (MaskedTruncWithPrecisionFloat64x8 [a] x mask)
+       // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               mask := v_1
+               v.reset(OpAMD64VRNDSCALEPDMasked512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg2(x, v0)
+               return true
+       }
+}
  func rewriteValueAMD64_OpMaskedXorFloat32x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -43218,6 +46406,132 @@ func rewriteValueAMD64_OpPopCount8(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpRoundFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundFloat32x4 x)
+       // result: (VROUNDPS128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundFloat32x8 x)
+       // result: (VROUNDPS256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundFloat64x2 x)
+       // result: (VROUNDPD128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundFloat64x4 x)
+       // result: (VROUNDPD256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+8] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 8)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
         v_0 := v.Args[0]
         // match: (RoundToEven x)
@@ -43230,6 +46544,84 @@ func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpRoundWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpRoundWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (RoundWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+0] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 0)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -45190,6 +48582,210 @@ func rewriteValueAMD64_OpTrunc(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpTruncFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncFloat32x4 x)
+       // result: (VROUNDPS128 [3] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = int8ToAuxInt(3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncFloat32x8 x)
+       // result: (VROUNDPS256 [3] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = int8ToAuxInt(3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncFloat64x2 x)
+       // result: (VROUNDPD128 [3] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = int8ToAuxInt(3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncFloat64x4 x)
+       // result: (VROUNDPD256 [3] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = int8ToAuxInt(3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncSuppressExceptionWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncSuppressExceptionWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+11] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 11)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpTruncWithPrecisionFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (TruncWithPrecisionFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+3] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = int8ToAuxInt(a + 3)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpZero(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go

index 3c8104ec2c033ca52ff62fdc47afbf3beab2dff4..d05d0e2066f7f6c3a7bd50cae3a62f59ff6a5a9d 100644 (file)
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -16,16 +16,32 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
@@ -87,6 +103,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x16.Xor", opLen2(ssa.OpXorFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.And", opLen2(ssa.OpAndFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.AndNot", opLen2(ssa.OpAndNotFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
@@ -110,6 +127,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.Xor", opLen2(ssa.OpXorFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.And", opLen2(ssa.OpAndFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.AndNot", opLen2(ssa.OpAndNotFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
@@ -133,6 +151,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x8.Xor", opLen2(ssa.OpXorFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.Add", opLen2(ssa.OpAddFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
@@ -156,6 +175,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.Sub", opLen2(ssa.OpSubFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.Xor", opLen2(ssa.OpXorFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Add", opLen2(ssa.OpAddFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.AddSub", opLen2(ssa.OpAddSubFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x4.And", opLen2(ssa.OpAndFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x4.AndNot", opLen2(ssa.OpAndNotFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
@@ -1083,6 +1103,198 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x16.AsFloat64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Float32x16.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Float32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go

index 5dfb49cf2d5f590e08b60640a7f57515d37cdc95..d433b67c9aefc469aa8873c561a5a381792f83db 100644 (file)
--- a/src/simd/stubs_amd64.go
+++ b/src/simd/stubs_amd64.go
@@ -19,36 +19,84 @@ func (x Float32x4) ApproximateReciprocal() Float32x4
  // Asm: VRSQRTPS, CPU Feature: AVX
  func (x Float32x4) ApproximateReciprocalOfSqrt() Float32x4
  
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Ceil() Float32x4
+
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Floor() Float32x4
+
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Round() Float32x4
+
  // Asm: VSQRTPS, CPU Feature: AVX
  func (x Float32x4) Sqrt() Float32x4
  
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x4) Trunc() Float32x4
+
  // Asm: VRCP14PS, CPU Feature: AVX512EVEX
  func (x Float32x8) ApproximateReciprocal() Float32x8
  
  // Asm: VRSQRTPS, CPU Feature: AVX
  func (x Float32x8) ApproximateReciprocalOfSqrt() Float32x8
  
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Ceil() Float32x8
+
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Floor() Float32x8
+
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Round() Float32x8
+
  // Asm: VSQRTPS, CPU Feature: AVX
  func (x Float32x8) Sqrt() Float32x8
  
+// Asm: VROUNDPS, CPU Feature: AVX
+func (x Float32x8) Trunc() Float32x8
+
  // Asm: VRCP14PD, CPU Feature: AVX512EVEX
  func (x Float64x2) ApproximateReciprocal() Float64x2
  
  // Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
  func (x Float64x2) ApproximateReciprocalOfSqrt() Float64x2
  
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Ceil() Float64x2
+
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Floor() Float64x2
+
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Round() Float64x2
+
  // Asm: VSQRTPD, CPU Feature: AVX
  func (x Float64x2) Sqrt() Float64x2
  
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x2) Trunc() Float64x2
+
  // Asm: VRCP14PD, CPU Feature: AVX512EVEX
  func (x Float64x4) ApproximateReciprocal() Float64x4
  
  // Asm: VRSQRT14PD, CPU Feature: AVX512EVEX
  func (x Float64x4) ApproximateReciprocalOfSqrt() Float64x4
  
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Ceil() Float64x4
+
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Floor() Float64x4
+
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Round() Float64x4
+
  // Asm: VSQRTPD, CPU Feature: AVX
  func (x Float64x4) Sqrt() Float64x4
  
+// Asm: VROUNDPD, CPU Feature: AVX
+func (x Float64x4) Trunc() Float64x4
+
  // Asm: VRCP14PD, CPU Feature: AVX512EVEX
  func (x Float64x8) ApproximateReciprocal() Float64x8
  
@@ -246,6 +294,9 @@ func (x Float32x16) Xor(y Float32x16) Float32x16
  // Asm: VADDPS, CPU Feature: AVX
  func (x Float32x4) Add(y Float32x4) Float32x4
  
+// Asm: VADDSUBPS, CPU Feature: AVX
+func (x Float32x4) AddSub(y Float32x4) Float32x4
+
  // Asm: VANDPS, CPU Feature: AVX
  func (x Float32x4) And(y Float32x4) Float32x4
  
@@ -333,6 +384,9 @@ func (x Float32x4) Xor(y Float32x4) Float32x4
  // Asm: VADDPS, CPU Feature: AVX
  func (x Float32x8) Add(y Float32x8) Float32x8
  
+// Asm: VADDSUBPS, CPU Feature: AVX
+func (x Float32x8) AddSub(y Float32x8) Float32x8
+
  // Asm: VANDPS, CPU Feature: AVX
  func (x Float32x8) And(y Float32x8) Float32x8
  
@@ -420,6 +474,9 @@ func (x Float32x8) Xor(y Float32x8) Float32x8
  // Asm: VADDPD, CPU Feature: AVX
  func (x Float64x2) Add(y Float64x2) Float64x2
  
+// Asm: VADDSUBPD, CPU Feature: AVX
+func (x Float64x2) AddSub(y Float64x2) Float64x2
+
  // Asm: VANDPD, CPU Feature: AVX
  func (x Float64x2) And(y Float64x2) Float64x2
  
@@ -507,6 +564,9 @@ func (x Float64x2) Xor(y Float64x2) Float64x2
  // Asm: VADDPD, CPU Feature: AVX
  func (x Float64x4) Add(y Float64x4) Float64x4
  
+// Asm: VADDSUBPD, CPU Feature: AVX
+func (x Float64x4) AddSub(y Float64x4) Float64x4
+
  // Asm: VANDPD, CPU Feature: AVX
  func (x Float64x4) And(y Float64x4) Float64x4
  
@@ -4112,6 +4172,582 @@ func (x Uint8x64) MaskedSaturatedSub(y Uint8x64, z Mask8x64) Uint8x64
  // Asm: VPSUBB, CPU Feature: AVX512EVEX
  func (x Uint8x64) MaskedSub(y Uint8x64, z Mask8x64) Uint8x64
  
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) CeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) CeilWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) CeilWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) CeilWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) CeilWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) CeilWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) CeilWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithCeilSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithCeilWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithCeilWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithCeilWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithCeilWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithCeilWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithCeilWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithFloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithFloorWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithFloorWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithFloorWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithFloorWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithFloorWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithFloorWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithRoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithRoundWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithRoundWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithRoundWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithRoundWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithRoundWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithRoundWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithTruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) DiffWithTruncWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) DiffWithTruncWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) DiffWithTruncWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) DiffWithTruncWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) DiffWithTruncWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) DiffWithTruncWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) FloorSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) FloorWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) FloorWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) FloorWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) FloorWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) FloorWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) FloorWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) RoundSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) RoundWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) RoundWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) RoundWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) RoundWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) RoundWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) RoundWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) TruncSuppressExceptionWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) TruncWithPrecision(imm8 uint8) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) TruncWithPrecision(imm8 uint8) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) TruncWithPrecision(imm8 uint8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) TruncWithPrecision(imm8 uint8) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) TruncWithPrecision(imm8 uint8) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) TruncWithPrecision(imm8 uint8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithCeilSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithCeilWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VREDUCEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VREDUCEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedDiffWithTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFloorSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedFloorWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedFloorWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedFloorWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedFloorWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedFloorWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedFloorWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedRoundSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedRoundWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedRoundWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedRoundWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedRoundWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedRoundWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedRoundWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedTruncSuppressExceptionWithPrecision(imm uint8, y Mask64x8) Float64x8
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x16) MaskedTruncWithPrecision(imm uint8, y Mask32x16) Float32x16
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x4) MaskedTruncWithPrecision(imm uint8, y Mask32x4) Float32x4
+
+// Asm: VRNDSCALEPS, CPU Feature: AVX512EVEX
+func (x Float32x8) MaskedTruncWithPrecision(imm uint8, y Mask32x8) Float32x8
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x2) MaskedTruncWithPrecision(imm uint8, y Mask64x2) Float64x2
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x4) MaskedTruncWithPrecision(imm uint8, y Mask64x4) Float64x4
+
+// Asm: VRNDSCALEPD, CPU Feature: AVX512EVEX
+func (x Float64x8) MaskedTruncWithPrecision(imm uint8, y Mask64x8) Float64x8
+
  // Float64x8 converts from Float32x16 to Float64x8
  func (from Float32x16) AsFloat64x8() (to Float64x8)
author	Junyang Shao <shaojunyang@google.com>
	Thu, 12 Jun 2025 16:21:35 +0000 (16:21 +0000)
committer	Junyang Shao <shaojunyang@google.com>
	Fri, 13 Jun 2025 18:43:48 +0000 (11:43 -0700)
src/cmd/compile/internal/amd64/simdssa.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/simdintrinsics.go		patch \| blob \| history
src/simd/stubs_amd64.go		patch \| blob \| history