[dev.simd] simd: make OpMasked machine ops only

author Junyang Shao <shaojunyang@google.com>

Mon, 18 Aug 2025 21:13:00 +0000 (21:13 +0000)

committer Junyang Shao <shaojunyang@google.com>

Tue, 19 Aug 2025 20:46:58 +0000 (13:46 -0700)
author Junyang Shao <shaojunyang@google.com>
Mon, 18 Aug 2025 21:13:00 +0000 (21:13 +0000)
committer Junyang Shao <shaojunyang@google.com>
Tue, 19 Aug 2025 20:46:58 +0000 (13:46 -0700)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

index d64f36cf74e9c2c112da04dfddce65fd0083759e..cfe0075986f80ebcb9c5b3de6e7acdeda52642c1 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -12,18 +12,6 @@
  (AbsInt64x2 ...) => (VPABSQ128 ...)
  (AbsInt64x4 ...) => (VPABSQ256 ...)
  (AbsInt64x8 ...) => (VPABSQ512 ...)
-(AbsMaskedInt8x16 x mask) => (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AbsMaskedInt8x32 x mask) => (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AbsMaskedInt8x64 x mask) => (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AbsMaskedInt16x8 x mask) => (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AbsMaskedInt16x16 x mask) => (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AbsMaskedInt16x32 x mask) => (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AbsMaskedInt32x4 x mask) => (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AbsMaskedInt32x8 x mask) => (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AbsMaskedInt32x16 x mask) => (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AbsMaskedInt64x2 x mask) => (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AbsMaskedInt64x4 x mask) => (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AbsMaskedInt64x8 x mask) => (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (AddFloat32x4 ...) => (VADDPS128 ...)
  (AddFloat32x8 ...) => (VADDPS256 ...)
  (AddFloat32x16 ...) => (VADDPS512 ...)
@@ -57,51 +45,12 @@
  (AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
  (AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
  (AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
-(AddDotProdPairsSaturatedMaskedInt32x4 x y z mask) => (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x8 x y z mask) => (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdPairsSaturatedMaskedInt32x16 x y z mask) => (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
  (AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
  (AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
  (AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(AddDotProdQuadrupleMaskedInt32x4 x y z mask) => (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x8 x y z mask) => (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
  (AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
  (AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
  (AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
-(AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask) => (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask) => (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask) => (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat32x4 x y mask) => (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedFloat32x8 x y mask) => (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedFloat32x16 x y mask) => (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedFloat64x2 x y mask) => (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedFloat64x4 x y mask) => (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedFloat64x8 x y mask) => (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedInt8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedInt8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedInt8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedInt16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedInt16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedInt16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedInt32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedInt32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedInt32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedInt64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedInt64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedInt64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AddMaskedUint8x16 x y mask) => (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddMaskedUint8x32 x y mask) => (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddMaskedUint8x64 x y mask) => (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddMaskedUint16x8 x y mask) => (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddMaskedUint16x16 x y mask) => (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddMaskedUint16x32 x y mask) => (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddMaskedUint32x4 x y mask) => (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AddMaskedUint32x8 x y mask) => (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AddMaskedUint32x16 x y mask) => (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AddMaskedUint64x2 x y mask) => (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AddMaskedUint64x4 x y mask) => (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AddMaskedUint64x8 x y mask) => (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (AddPairsFloat32x4 ...) => (VHADDPS128 ...)
  (AddPairsFloat32x8 ...) => (VHADDPS256 ...)
  (AddPairsFloat64x2 ...) => (VHADDPD128 ...)
@@ -128,18 +77,6 @@
  (AddSaturatedUint16x8 ...) => (VPADDUSW128 ...)
  (AddSaturatedUint16x16 ...) => (VPADDUSW256 ...)
  (AddSaturatedUint16x32 ...) => (VPADDUSW512 ...)
-(AddSaturatedMaskedInt8x16 x y mask) => (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x32 x y mask) => (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt8x64 x y mask) => (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x8 x y mask) => (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x16 x y mask) => (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedInt16x32 x y mask) => (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x16 x y mask) => (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x32 x y mask) => (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint8x64 x y mask) => (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x8 x y mask) => (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x16 x y mask) => (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AddSaturatedMaskedUint16x32 x y mask) => (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (AddSubFloat32x4 ...) => (VADDSUBPS128 ...)
  (AddSubFloat32x8 ...) => (VADDSUBPS256 ...)
  (AddSubFloat64x2 ...) => (VADDSUBPD128 ...)
@@ -168,18 +105,6 @@
  (AndUint64x2 ...) => (VPAND128 ...)
  (AndUint64x4 ...) => (VPAND256 ...)
  (AndUint64x8 ...) => (VPANDQ512 ...)
-(AndMaskedInt32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedInt32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedInt32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedInt64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedInt64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedInt64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x4 x y mask) => (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndMaskedUint32x8 x y mask) => (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndMaskedUint32x16 x y mask) => (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndMaskedUint64x2 x y mask) => (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndMaskedUint64x4 x y mask) => (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (AndNotInt8x16 ...) => (VPANDN128 ...)
  (AndNotInt8x32 ...) => (VPANDN256 ...)
  (AndNotInt8x64 ...) => (VPANDND512 ...)
@@ -204,30 +129,12 @@
  (AndNotUint64x2 ...) => (VPANDN128 ...)
  (AndNotUint64x4 ...) => (VPANDN256 ...)
  (AndNotUint64x8 ...) => (VPANDNQ512 ...)
-(AndNotMaskedInt32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedInt32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedInt64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x4 x y mask) => (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x8 x y mask) => (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(AndNotMaskedUint32x16 x y mask) => (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (AverageUint8x16 ...) => (VPAVGB128 ...)
  (AverageUint8x32 ...) => (VPAVGB256 ...)
  (AverageUint8x64 ...) => (VPAVGB512 ...)
  (AverageUint16x8 ...) => (VPAVGW128 ...)
  (AverageUint16x16 ...) => (VPAVGW256 ...)
  (AverageUint16x32 ...) => (VPAVGW512 ...)
-(AverageMaskedUint8x16 x y mask) => (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(AverageMaskedUint8x32 x y mask) => (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(AverageMaskedUint8x64 x y mask) => (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(AverageMaskedUint16x8 x y mask) => (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(AverageMaskedUint16x16 x y mask) => (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(AverageMaskedUint16x32 x y mask) => (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
  (Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
  (Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
@@ -238,16 +145,6 @@
  (Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
  (Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
  (Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128MaskedFloat32x4 x mask) => (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedFloat64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedInt64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint8x16 x mask) => (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint16x8 x mask) => (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint32x4 x mask) => (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast128MaskedUint64x2 x mask) => (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
  (Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
  (Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
  (Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
@@ -258,16 +155,6 @@
  (Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
  (Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
  (Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256MaskedFloat32x4 x mask) => (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedFloat64x2 x mask) => (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedInt64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint8x16 x mask) => (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint16x8 x mask) => (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint32x4 x mask) => (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast256MaskedUint64x2 x mask) => (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
  (Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
  (Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
  (Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
@@ -278,16 +165,6 @@
  (Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
  (Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
  (Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512MaskedFloat32x4 x mask) => (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedFloat64x2 x mask) => (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedInt64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint8x16 x mask) => (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint16x8 x mask) => (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint32x4 x mask) => (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Broadcast512MaskedUint64x2 x mask) => (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
  (CeilFloat32x4 x) => (VROUNDPS128 [2] x)
  (CeilFloat32x8 x) => (VROUNDPS256 [2] x)
  (CeilFloat64x2 x) => (VROUNDPD128 [2] x)
@@ -298,24 +175,12 @@
  (CeilScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+2] x)
  (CeilScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+2] x)
  (CeilScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+2] x)
-(CeilScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (CeilScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+2] x)
  (CeilScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+2] x)
  (CeilScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+2] x)
  (CeilScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+2] x)
  (CeilScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+2] x)
  (CeilScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+2] x)
-(CeilScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(CeilScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (CompressFloat32x4 x mask) => (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
  (CompressFloat32x8 x mask) => (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
  (CompressFloat32x16 x mask) => (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -349,15 +214,9 @@
  (ConvertToInt32Float32x4 ...) => (VCVTTPS2DQ128 ...)
  (ConvertToInt32Float32x8 ...) => (VCVTTPS2DQ256 ...)
  (ConvertToInt32Float32x16 ...) => (VCVTTPS2DQ512 ...)
-(ConvertToInt32MaskedFloat32x4 x mask) => (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x8 x mask) => (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToInt32MaskedFloat32x16 x mask) => (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
  (ConvertToUint32Float32x4 ...) => (VCVTPS2UDQ128 ...)
  (ConvertToUint32Float32x8 ...) => (VCVTPS2UDQ256 ...)
  (ConvertToUint32Float32x16 ...) => (VCVTPS2UDQ512 ...)
-(ConvertToUint32MaskedFloat32x4 x mask) => (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x8 x mask) => (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ConvertToUint32MaskedFloat32x16 x mask) => (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
  (CopySignInt8x16 ...) => (VPSIGNB128 ...)
  (CopySignInt8x32 ...) => (VPSIGNB256 ...)
  (CopySignInt16x8 ...) => (VPSIGNW128 ...)
@@ -370,24 +229,12 @@
  (DivFloat64x2 ...) => (VDIVPD128 ...)
  (DivFloat64x4 ...) => (VDIVPD256 ...)
  (DivFloat64x8 ...) => (VDIVPD512 ...)
-(DivMaskedFloat32x4 x y mask) => (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(DivMaskedFloat32x8 x y mask) => (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(DivMaskedFloat32x16 x y mask) => (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(DivMaskedFloat64x2 x y mask) => (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(DivMaskedFloat64x4 x y mask) => (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(DivMaskedFloat64x8 x y mask) => (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
  (DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
  (DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
-(DotProdPairsMaskedInt16x8 x y mask) => (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x16 x y mask) => (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsMaskedInt16x32 x y mask) => (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
  (DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
  (DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
-(DotProdPairsSaturatedMaskedUint8x16 x y mask) => (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x32 x y mask) => (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(DotProdPairsSaturatedMaskedUint8x64 x y mask) => (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
  (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
  (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
@@ -418,36 +265,6 @@
  (EqualUint64x2 ...) => (VPCMPEQQ128 ...)
  (EqualUint64x4 ...) => (VPCMPEQQ256 ...)
  (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(EqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(EqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
  (ExpandFloat32x4 x mask) => (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
  (ExpandFloat32x8 x mask) => (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
  (ExpandFloat32x16 x mask) => (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
@@ -488,42 +305,21 @@
  (FloorScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+1] x)
  (FloorScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+1] x)
  (FloorScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+1] x)
-(FloorScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (FloorScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+1] x)
  (FloorScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+1] x)
  (FloorScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+1] x)
  (FloorScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+1] x)
  (FloorScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+1] x)
  (FloorScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+1] x)
-(FloorScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(FloorScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (GaloisFieldAffineTransformUint8x16 ...) => (VGF2P8AFFINEQB128 ...)
  (GaloisFieldAffineTransformUint8x32 ...) => (VGF2P8AFFINEQB256 ...)
  (GaloisFieldAffineTransformUint8x64 ...) => (VGF2P8AFFINEQB512 ...)
  (GaloisFieldAffineTransformInverseUint8x16 ...) => (VGF2P8AFFINEINVQB128 ...)
  (GaloisFieldAffineTransformInverseUint8x32 ...) => (VGF2P8AFFINEINVQB256 ...)
  (GaloisFieldAffineTransformInverseUint8x64 ...) => (VGF2P8AFFINEINVQB512 ...)
-(GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask) => (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask) => (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask) => (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
  (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
  (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
  (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
-(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
  (GetElemFloat32x4 ...) => (VPEXTRD128 ...)
  (GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
  (GetElemInt8x16 ...) => (VPEXTRB128 ...)
@@ -610,78 +406,12 @@
  (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
  (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
  (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-(GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(GreaterMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(GreaterMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
  (IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
  (IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
  (IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
  (IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
  (IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
  (IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-(IsNanMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(IsNanMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
  (LessFloat32x4 x y) => (VCMPPS128 [1] x y)
  (LessFloat32x8 x y) => (VCMPPS256 [1] x y)
  (LessFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
@@ -710,66 +440,6 @@
  (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
  (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
  (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-(LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(LessMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(LessMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(LessMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(LessMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(LessMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(LessMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(LessMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(LessMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(LessMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(LessMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(LessMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(LessMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
  (MaxFloat32x4 ...) => (VMAXPS128 ...)
  (MaxFloat32x8 ...) => (VMAXPS256 ...)
  (MaxFloat32x16 ...) => (VMAXPS512 ...)
@@ -800,36 +470,6 @@
  (MaxUint64x2 ...) => (VPMAXUQ128 ...)
  (MaxUint64x4 ...) => (VPMAXUQ256 ...)
  (MaxUint64x8 ...) => (VPMAXUQ512 ...)
-(MaxMaskedFloat32x4 x y mask) => (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x8 x y mask) => (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedFloat32x16 x y mask) => (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x2 x y mask) => (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x4 x y mask) => (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedFloat64x8 x y mask) => (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedInt8x16 x y mask) => (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedInt8x32 x y mask) => (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedInt8x64 x y mask) => (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedInt16x8 x y mask) => (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedInt16x16 x y mask) => (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedInt16x32 x y mask) => (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedInt32x4 x y mask) => (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedInt32x8 x y mask) => (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedInt32x16 x y mask) => (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedInt64x2 x y mask) => (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedInt64x4 x y mask) => (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedInt64x8 x y mask) => (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MaxMaskedUint8x16 x y mask) => (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MaxMaskedUint8x32 x y mask) => (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MaxMaskedUint8x64 x y mask) => (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MaxMaskedUint16x8 x y mask) => (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MaxMaskedUint16x16 x y mask) => (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MaxMaskedUint16x32 x y mask) => (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MaxMaskedUint32x4 x y mask) => (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MaxMaskedUint32x8 x y mask) => (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MaxMaskedUint32x16 x y mask) => (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MaxMaskedUint64x2 x y mask) => (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MaxMaskedUint64x4 x y mask) => (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MaxMaskedUint64x8 x y mask) => (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MinFloat32x4 ...) => (VMINPS128 ...)
  (MinFloat32x8 ...) => (VMINPS256 ...)
  (MinFloat32x16 ...) => (VMINPS512 ...)
@@ -860,36 +500,6 @@
  (MinUint64x2 ...) => (VPMINUQ128 ...)
  (MinUint64x4 ...) => (VPMINUQ256 ...)
  (MinUint64x8 ...) => (VPMINUQ512 ...)
-(MinMaskedFloat32x4 x y mask) => (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedFloat32x8 x y mask) => (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedFloat32x16 x y mask) => (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedFloat64x2 x y mask) => (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedFloat64x4 x y mask) => (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedFloat64x8 x y mask) => (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedInt8x16 x y mask) => (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedInt8x32 x y mask) => (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedInt8x64 x y mask) => (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedInt16x8 x y mask) => (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedInt16x16 x y mask) => (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedInt16x32 x y mask) => (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedInt32x4 x y mask) => (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedInt32x8 x y mask) => (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedInt32x16 x y mask) => (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedInt64x2 x y mask) => (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedInt64x4 x y mask) => (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedInt64x8 x y mask) => (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MinMaskedUint8x16 x y mask) => (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(MinMaskedUint8x32 x y mask) => (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(MinMaskedUint8x64 x y mask) => (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(MinMaskedUint16x8 x y mask) => (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MinMaskedUint16x16 x y mask) => (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MinMaskedUint16x32 x y mask) => (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MinMaskedUint32x4 x y mask) => (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MinMaskedUint32x8 x y mask) => (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MinMaskedUint32x16 x y mask) => (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MinMaskedUint64x2 x y mask) => (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MinMaskedUint64x4 x y mask) => (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MinMaskedUint64x8 x y mask) => (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MulFloat32x4 ...) => (VMULPS128 ...)
  (MulFloat32x8 ...) => (VMULPS256 ...)
  (MulFloat32x16 ...) => (VMULPS512 ...)
@@ -920,24 +530,12 @@
  (MulAddFloat64x2 ...) => (VFMADD213PD128 ...)
  (MulAddFloat64x4 ...) => (VFMADD213PD256 ...)
  (MulAddFloat64x8 ...) => (VFMADD213PD512 ...)
-(MulAddMaskedFloat32x4 x y z mask) => (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x8 x y z mask) => (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddMaskedFloat32x16 x y z mask) => (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x2 x y z mask) => (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x4 x y z mask) => (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddMaskedFloat64x8 x y z mask) => (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MulAddSubFloat32x4 ...) => (VFMADDSUB213PS128 ...)
  (MulAddSubFloat32x8 ...) => (VFMADDSUB213PS256 ...)
  (MulAddSubFloat32x16 ...) => (VFMADDSUB213PS512 ...)
  (MulAddSubFloat64x2 ...) => (VFMADDSUB213PD128 ...)
  (MulAddSubFloat64x4 ...) => (VFMADDSUB213PD256 ...)
  (MulAddSubFloat64x8 ...) => (VFMADDSUB213PD512 ...)
-(MulAddSubMaskedFloat32x4 x y z mask) => (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x8 x y z mask) => (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat32x16 x y z mask) => (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x2 x y z mask) => (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x4 x y z mask) => (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulAddSubMaskedFloat64x8 x y z mask) => (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MulEvenWidenInt32x4 ...) => (VPMULDQ128 ...)
  (MulEvenWidenInt32x8 ...) => (VPMULDQ256 ...)
  (MulEvenWidenUint32x4 ...) => (VPMULUDQ128 ...)
@@ -948,48 +546,12 @@
  (MulHighUint16x8 ...) => (VPMULHUW128 ...)
  (MulHighUint16x16 ...) => (VPMULHUW256 ...)
  (MulHighUint16x32 ...) => (VPMULHUW512 ...)
-(MulHighMaskedInt16x8 x y mask) => (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x16 x y mask) => (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedInt16x32 x y mask) => (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x8 x y mask) => (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x16 x y mask) => (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulHighMaskedUint16x32 x y mask) => (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedFloat32x4 x y mask) => (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedFloat32x8 x y mask) => (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedFloat32x16 x y mask) => (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedFloat64x2 x y mask) => (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedFloat64x4 x y mask) => (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedFloat64x8 x y mask) => (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedInt16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedInt16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedInt32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedInt32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedInt32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedInt64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedInt64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedInt64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x8 x y mask) => (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(MulMaskedUint16x16 x y mask) => (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(MulMaskedUint16x32 x y mask) => (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(MulMaskedUint32x4 x y mask) => (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulMaskedUint32x8 x y mask) => (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulMaskedUint32x16 x y mask) => (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulMaskedUint64x2 x y mask) => (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulMaskedUint64x4 x y mask) => (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulMaskedUint64x8 x y mask) => (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (MulSubAddFloat32x4 ...) => (VFMSUBADD213PS128 ...)
  (MulSubAddFloat32x8 ...) => (VFMSUBADD213PS256 ...)
  (MulSubAddFloat32x16 ...) => (VFMSUBADD213PS512 ...)
  (MulSubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
  (MulSubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
  (MulSubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
-(MulSubAddMaskedFloat32x4 x y z mask) => (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x8 x y z mask) => (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat32x16 x y z mask) => (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x2 x y z mask) => (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x4 x y z mask) => (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(MulSubAddMaskedFloat64x8 x y z mask) => (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
  (NotEqualFloat32x4 x y) => (VCMPPS128 [4] x y)
  (NotEqualFloat32x8 x y) => (VCMPPS256 [4] x y)
  (NotEqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
@@ -1004,36 +566,6 @@
  (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
  (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
  (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
-(NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x2 x y mask) => (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x4 x y mask) => (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedFloat64x8 x y mask) => (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedInt64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x16 x y mask) => (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x32 x y mask) => (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint8x64 x y mask) => (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x8 x y mask) => (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x16 x y mask) => (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint16x32 x y mask) => (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x4 x y mask) => (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x8 x y mask) => (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint32x16 x y mask) => (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x2 x y mask) => (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x4 x y mask) => (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-(NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
  (OnesCountInt8x16 ...) => (VPOPCNTB128 ...)
  (OnesCountInt8x32 ...) => (VPOPCNTB256 ...)
  (OnesCountInt8x64 ...) => (VPOPCNTB512 ...)
@@ -1058,30 +590,6 @@
  (OnesCountUint64x2 ...) => (VPOPCNTQ128 ...)
  (OnesCountUint64x4 ...) => (VPOPCNTQ256 ...)
  (OnesCountUint64x8 ...) => (VPOPCNTQ512 ...)
-(OnesCountMaskedInt8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedInt32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedInt64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x16 x mask) => (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x32 x mask) => (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint8x64 x mask) => (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x8 x mask) => (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x16 x mask) => (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint16x32 x mask) => (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x4 x mask) => (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x8 x mask) => (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OnesCountMaskedUint32x16 x mask) => (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x2 x mask) => (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x4 x mask) => (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OnesCountMaskedUint64x8 x mask) => (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (OrInt8x16 ...) => (VPOR128 ...)
  (OrInt8x32 ...) => (VPOR256 ...)
  (OrInt8x64 ...) => (VPORD512 ...)
@@ -1106,18 +614,6 @@
  (OrUint64x2 ...) => (VPOR128 ...)
  (OrUint64x4 ...) => (VPOR256 ...)
  (OrUint64x8 ...) => (VPORQ512 ...)
-(OrMaskedInt32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedInt32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedInt32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedInt64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedInt64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedInt64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x4 x y mask) => (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(OrMaskedUint32x8 x y mask) => (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(OrMaskedUint32x16 x y mask) => (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(OrMaskedUint64x2 x y mask) => (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(OrMaskedUint64x4 x y mask) => (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(OrMaskedUint64x8 x y mask) => (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (PermuteFloat32x8 ...) => (VPERMPS256 ...)
  (PermuteFloat32x16 ...) => (VPERMPS512 ...)
  (PermuteFloat64x4 ...) => (VPERMPD256 ...)
@@ -1172,84 +668,18 @@
  (Permute2Uint64x2 ...) => (VPERMI2Q128 ...)
  (Permute2Uint64x4 ...) => (VPERMI2Q256 ...)
  (Permute2Uint64x8 ...) => (VPERMI2Q512 ...)
-(Permute2MaskedFloat32x4 x y z mask) => (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x8 x y z mask) => (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedFloat32x16 x y z mask) => (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x2 x y z mask) => (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x4 x y z mask) => (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedFloat64x8 x y z mask) => (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedInt32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedInt64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x16 x y z mask) => (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x32 x y z mask) => (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint8x64 x y z mask) => (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x8 x y z mask) => (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x16 x y z mask) => (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint16x32 x y z mask) => (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x4 x y z mask) => (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x8 x y z mask) => (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(Permute2MaskedUint32x16 x y z mask) => (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x2 x y z mask) => (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x4 x y z mask) => (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(Permute2MaskedUint64x8 x y z mask) => (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x8 x y mask) => (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedFloat32x16 x y mask) => (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x4 x y mask) => (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedFloat64x8 x y mask) => (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedInt32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedInt64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x16 x y mask) => (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x32 x y mask) => (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint8x64 x y mask) => (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x8 x y mask) => (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x16 x y mask) => (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint16x32 x y mask) => (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x8 x y mask) => (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(PermuteMaskedUint32x16 x y mask) => (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x4 x y mask) => (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(PermuteMaskedUint64x8 x y mask) => (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ReciprocalFloat32x4 ...) => (VRCPPS128 ...)
  (ReciprocalFloat32x8 ...) => (VRCPPS256 ...)
  (ReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
  (ReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
  (ReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
  (ReciprocalFloat64x8 ...) => (VRCP14PD512 ...)
-(ReciprocalMaskedFloat32x4 x mask) => (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x8 x mask) => (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat32x16 x mask) => (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x2 x mask) => (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x4 x mask) => (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalMaskedFloat64x8 x mask) => (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ReciprocalSqrtFloat32x4 ...) => (VRSQRTPS128 ...)
  (ReciprocalSqrtFloat32x8 ...) => (VRSQRTPS256 ...)
  (ReciprocalSqrtFloat32x16 ...) => (VRSQRT14PS512 ...)
  (ReciprocalSqrtFloat64x2 ...) => (VRSQRT14PD128 ...)
  (ReciprocalSqrtFloat64x4 ...) => (VRSQRT14PD256 ...)
  (ReciprocalSqrtFloat64x8 ...) => (VRSQRT14PD512 ...)
-(ReciprocalSqrtMaskedFloat32x4 x mask) => (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x8 x mask) => (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat32x16 x mask) => (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x2 x mask) => (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x4 x mask) => (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ReciprocalSqrtMaskedFloat64x8 x mask) => (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RotateAllLeftInt32x4 ...) => (VPROLD128 ...)
  (RotateAllLeftInt32x8 ...) => (VPROLD256 ...)
  (RotateAllLeftInt32x16 ...) => (VPROLD512 ...)
@@ -1262,18 +692,6 @@
  (RotateAllLeftUint64x2 ...) => (VPROLQ128 ...)
  (RotateAllLeftUint64x4 ...) => (VPROLQ256 ...)
  (RotateAllLeftUint64x8 ...) => (VPROLQ512 ...)
-(RotateAllLeftMaskedInt32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedInt64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x4 [a] x mask) => (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x8 [a] x mask) => (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint32x16 [a] x mask) => (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x2 [a] x mask) => (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x4 [a] x mask) => (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllLeftMaskedUint64x8 [a] x mask) => (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RotateAllRightInt32x4 ...) => (VPRORD128 ...)
  (RotateAllRightInt32x8 ...) => (VPRORD256 ...)
  (RotateAllRightInt32x16 ...) => (VPRORD512 ...)
@@ -1286,18 +704,6 @@
  (RotateAllRightUint64x2 ...) => (VPRORQ128 ...)
  (RotateAllRightUint64x4 ...) => (VPRORQ256 ...)
  (RotateAllRightUint64x8 ...) => (VPRORQ512 ...)
-(RotateAllRightMaskedInt32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedInt64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x4 [a] x mask) => (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x8 [a] x mask) => (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint32x16 [a] x mask) => (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x2 [a] x mask) => (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x4 [a] x mask) => (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateAllRightMaskedUint64x8 [a] x mask) => (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RotateLeftInt32x4 ...) => (VPROLVD128 ...)
  (RotateLeftInt32x8 ...) => (VPROLVD256 ...)
  (RotateLeftInt32x16 ...) => (VPROLVD512 ...)
@@ -1310,18 +716,6 @@
  (RotateLeftUint64x2 ...) => (VPROLVQ128 ...)
  (RotateLeftUint64x4 ...) => (VPROLVQ256 ...)
  (RotateLeftUint64x8 ...) => (VPROLVQ512 ...)
-(RotateLeftMaskedInt32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedInt64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x4 x y mask) => (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x8 x y mask) => (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint32x16 x y mask) => (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x2 x y mask) => (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x4 x y mask) => (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateLeftMaskedUint64x8 x y mask) => (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RotateRightInt32x4 ...) => (VPRORVD128 ...)
  (RotateRightInt32x8 ...) => (VPRORVD256 ...)
  (RotateRightInt32x16 ...) => (VPRORVD512 ...)
@@ -1334,18 +728,6 @@
  (RotateRightUint64x2 ...) => (VPRORVQ128 ...)
  (RotateRightUint64x4 ...) => (VPRORVQ256 ...)
  (RotateRightUint64x8 ...) => (VPRORVQ512 ...)
-(RotateRightMaskedInt32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedInt32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedInt64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x4 x y mask) => (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x8 x y mask) => (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RotateRightMaskedUint32x16 x y mask) => (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x2 x y mask) => (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x4 x y mask) => (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RotateRightMaskedUint64x8 x y mask) => (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RoundToEvenFloat32x4 x) => (VROUNDPS128 [0] x)
  (RoundToEvenFloat32x8 x) => (VROUNDPS256 [0] x)
  (RoundToEvenFloat64x2 x) => (VROUNDPD128 [0] x)
@@ -1356,36 +738,18 @@
  (RoundToEvenScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+0] x)
  (RoundToEvenScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+0] x)
  (RoundToEvenScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+0] x)
-(RoundToEvenScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (RoundToEvenScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+0] x)
  (RoundToEvenScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+0] x)
  (RoundToEvenScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+0] x)
  (RoundToEvenScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+0] x)
  (RoundToEvenScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+0] x)
  (RoundToEvenScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+0] x)
-(RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ScaleFloat32x4 ...) => (VSCALEFPS128 ...)
  (ScaleFloat32x8 ...) => (VSCALEFPS256 ...)
  (ScaleFloat32x16 ...) => (VSCALEFPS512 ...)
  (ScaleFloat64x2 ...) => (VSCALEFPD128 ...)
  (ScaleFloat64x4 ...) => (VSCALEFPD256 ...)
  (ScaleFloat64x8 ...) => (VSCALEFPD512 ...)
-(ScaleMaskedFloat32x4 x y mask) => (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x8 x y mask) => (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ScaleMaskedFloat32x16 x y mask) => (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (SetElemFloat32x4 ...) => (VPINSRD128 ...)
  (SetElemFloat64x2 ...) => (VPINSRQ128 ...)
  (SetElemInt8x16 ...) => (VPINSRB128 ...)
@@ -1481,51 +845,6 @@
  (ShiftAllLeftConcatUint64x2 ...) => (VPSHLDQ128 ...)
  (ShiftAllLeftConcatUint64x4 ...) => (VPSHLDQ256 ...)
  (ShiftAllLeftConcatUint64x8 ...) => (VPSHLDQ512 ...)
-(ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) => (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) => (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) => (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) => (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) => (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) => (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask)
-(ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ShiftAllRightInt16x8 ...) => (VPSRAW128 ...)
  (VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x)
  (ShiftAllRightInt16x16 ...) => (VPSRAW256 ...)
@@ -1571,51 +890,6 @@
  (ShiftAllRightConcatUint64x2 ...) => (VPSHRDQ128 ...)
  (ShiftAllRightConcatUint64x4 ...) => (VPSHRDQ256 ...)
  (ShiftAllRightConcatUint64x8 ...) => (VPSHRDQ512 ...)
-(ShiftAllRightConcatMaskedInt16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedInt64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x8 [a] x y mask) => (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x16 [a] x y mask) => (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint16x32 [a] x y mask) => (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x4 [a] x y mask) => (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x8 [a] x y mask) => (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint32x16 [a] x y mask) => (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
-(ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
  (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
  (ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1652,42 +926,6 @@
  (ShiftLeftConcatUint64x2 ...) => (VPSHLDVQ128 ...)
  (ShiftLeftConcatUint64x4 ...) => (VPSHLDVQ256 ...)
  (ShiftLeftConcatUint64x8 ...) => (VPSHLDVQ512 ...)
-(ShiftLeftConcatMaskedInt16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedInt64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x8 x y z mask) => (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x16 x y z mask) => (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint16x32 x y z mask) => (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x4 x y z mask) => (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x8 x y z mask) => (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint32x16 x y z mask) => (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x2 x y z mask) => (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x4 x y z mask) => (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftConcatMaskedUint64x8 x y z mask) => (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedInt64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x8 x y mask) => (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x16 x y mask) => (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint16x32 x y mask) => (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x4 x y mask) => (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x8 x y mask) => (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint32x16 x y mask) => (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x2 x y mask) => (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x4 x y mask) => (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftLeftMaskedUint64x8 x y mask) => (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (ShiftRightInt16x8 ...) => (VPSRAVW128 ...)
  (ShiftRightInt16x16 ...) => (VPSRAVW256 ...)
  (ShiftRightInt16x32 ...) => (VPSRAVW512 ...)
@@ -1724,54 +962,12 @@
  (ShiftRightConcatUint64x2 ...) => (VPSHRDVQ128 ...)
  (ShiftRightConcatUint64x4 ...) => (VPSHRDVQ256 ...)
  (ShiftRightConcatUint64x8 ...) => (VPSHRDVQ512 ...)
-(ShiftRightConcatMaskedInt16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedInt64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x8 x y z mask) => (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x16 x y z mask) => (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint16x32 x y z mask) => (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x4 x y z mask) => (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x8 x y z mask) => (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint32x16 x y z mask) => (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x2 x y z mask) => (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x4 x y z mask) => (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightConcatMaskedUint64x8 x y z mask) => (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x8 x y mask) => (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x16 x y mask) => (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt16x32 x y mask) => (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x4 x y mask) => (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x8 x y mask) => (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt32x16 x y mask) => (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x2 x y mask) => (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x4 x y mask) => (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedInt64x8 x y mask) => (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x8 x y mask) => (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x16 x y mask) => (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint16x32 x y mask) => (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x4 x y mask) => (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x8 x y mask) => (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint32x16 x y mask) => (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x2 x y mask) => (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x4 x y mask) => (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(ShiftRightMaskedUint64x8 x y mask) => (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (SqrtFloat32x4 ...) => (VSQRTPS128 ...)
  (SqrtFloat32x8 ...) => (VSQRTPS256 ...)
  (SqrtFloat32x16 ...) => (VSQRTPS512 ...)
  (SqrtFloat64x2 ...) => (VSQRTPD128 ...)
  (SqrtFloat64x4 ...) => (VSQRTPD256 ...)
  (SqrtFloat64x8 ...) => (VSQRTPD512 ...)
-(SqrtMaskedFloat32x4 x mask) => (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x8 x mask) => (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SqrtMaskedFloat32x16 x mask) => (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x2 x mask) => (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x4 x mask) => (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SqrtMaskedFloat64x8 x mask) => (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (SubFloat32x4 ...) => (VSUBPS128 ...)
  (SubFloat32x8 ...) => (VSUBPS256 ...)
  (SubFloat32x16 ...) => (VSUBPS512 ...)
@@ -1802,36 +998,6 @@
  (SubUint64x2 ...) => (VPSUBQ128 ...)
  (SubUint64x4 ...) => (VPSUBQ256 ...)
  (SubUint64x8 ...) => (VPSUBQ512 ...)
-(SubMaskedFloat32x4 x y mask) => (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedFloat32x8 x y mask) => (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedFloat32x16 x y mask) => (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedFloat64x2 x y mask) => (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedFloat64x4 x y mask) => (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedFloat64x8 x y mask) => (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedInt8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedInt8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedInt8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedInt16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedInt16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedInt16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedInt32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedInt32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedInt32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedInt64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedInt64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedInt64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(SubMaskedUint8x16 x y mask) => (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubMaskedUint8x32 x y mask) => (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubMaskedUint8x64 x y mask) => (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubMaskedUint16x8 x y mask) => (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubMaskedUint16x16 x y mask) => (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubMaskedUint16x32 x y mask) => (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubMaskedUint32x4 x y mask) => (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(SubMaskedUint32x8 x y mask) => (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(SubMaskedUint32x16 x y mask) => (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(SubMaskedUint64x2 x y mask) => (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(SubMaskedUint64x4 x y mask) => (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(SubMaskedUint64x8 x y mask) => (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (SubPairsFloat32x4 ...) => (VHSUBPS128 ...)
  (SubPairsFloat32x8 ...) => (VHSUBPS256 ...)
  (SubPairsFloat64x2 ...) => (VHSUBPD128 ...)
@@ -1858,18 +1024,6 @@
  (SubSaturatedUint16x8 ...) => (VPSUBUSW128 ...)
  (SubSaturatedUint16x16 ...) => (VPSUBUSW256 ...)
  (SubSaturatedUint16x32 ...) => (VPSUBUSW512 ...)
-(SubSaturatedMaskedInt8x16 x y mask) => (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x32 x y mask) => (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt8x64 x y mask) => (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x8 x y mask) => (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x16 x y mask) => (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedInt16x32 x y mask) => (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x16 x y mask) => (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x32 x y mask) => (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint8x64 x y mask) => (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x8 x y mask) => (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x16 x y mask) => (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-(SubSaturatedMaskedUint16x32 x y mask) => (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
  (TruncFloat32x4 x) => (VROUNDPS128 [3] x)
  (TruncFloat32x8 x) => (VROUNDPS256 [3] x)
  (TruncFloat64x2 x) => (VROUNDPD128 [3] x)
@@ -1880,24 +1034,12 @@
  (TruncScaledFloat64x2 [a] x) => (VRNDSCALEPD128 [a+3] x)
  (TruncScaledFloat64x4 [a] x) => (VRNDSCALEPD256 [a+3] x)
  (TruncScaledFloat64x8 [a] x) => (VRNDSCALEPD512 [a+3] x)
-(TruncScaledMaskedFloat32x4 [a] x mask) => (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x8 [a] x mask) => (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat32x16 [a] x mask) => (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x2 [a] x mask) => (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x4 [a] x mask) => (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledMaskedFloat64x8 [a] x mask) => (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (TruncScaledResidueFloat32x4 [a] x) => (VREDUCEPS128 [a+3] x)
  (TruncScaledResidueFloat32x8 [a] x) => (VREDUCEPS256 [a+3] x)
  (TruncScaledResidueFloat32x16 [a] x) => (VREDUCEPS512 [a+3] x)
  (TruncScaledResidueFloat64x2 [a] x) => (VREDUCEPD128 [a+3] x)
  (TruncScaledResidueFloat64x4 [a] x) => (VREDUCEPD256 [a+3] x)
  (TruncScaledResidueFloat64x8 [a] x) => (VREDUCEPD512 [a+3] x)
-(TruncScaledResidueMaskedFloat32x4 [a] x mask) => (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x8 [a] x mask) => (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat32x16 [a] x mask) => (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x2 [a] x mask) => (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x4 [a] x mask) => (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-(TruncScaledResidueMaskedFloat64x8 [a] x mask) => (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
  (XorInt8x16 ...) => (VPXOR128 ...)
  (XorInt8x32 ...) => (VPXOR256 ...)
  (XorInt8x64 ...) => (VPXORD512 ...)
@@ -1922,18 +1064,6 @@
  (XorUint64x2 ...) => (VPXOR128 ...)
  (XorUint64x4 ...) => (VPXOR256 ...)
  (XorUint64x8 ...) => (VPXORQ512 ...)
-(XorMaskedInt32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedInt32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedInt32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedInt64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedInt64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedInt64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x4 x y mask) => (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-(XorMaskedUint32x8 x y mask) => (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-(XorMaskedUint32x16 x y mask) => (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-(XorMaskedUint64x2 x y mask) => (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-(XorMaskedUint64x4 x y mask) => (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-(XorMaskedUint64x8 x y mask) => (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
  (blendInt8x16 ...) => (VPBLENDVB128 ...)
  (blendInt8x32 ...) => (VPBLENDVB256 ...)
  (blendMaskedInt8x64 x y mask) => (VPBLENDMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go

index d98c0d8152acef16dba65527de3344f6e159e35b..08bfe36951154b51f0c8744de599511aff3c1c41 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -16,36 +16,15 @@ func simdGenericOps() []opData {
                 {name: "AbsInt64x2", argLength: 1, commutative: false},
                 {name: "AbsInt64x4", argLength: 1, commutative: false},
                 {name: "AbsInt64x8", argLength: 1, commutative: false},
-               {name: "AbsMaskedInt8x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt8x32", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt8x64", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x8", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt16x32", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x4", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x8", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt32x16", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x2", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x4", argLength: 2, commutative: false},
-               {name: "AbsMaskedInt64x8", argLength: 2, commutative: false},
                 {name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
                 {name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
                 {name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdPairsSaturatedMaskedInt32x16", argLength: 4, commutative: false},
                 {name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
                 {name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
                 {name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleMaskedInt32x16", argLength: 4, commutative: false},
                 {name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
                 {name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
                 {name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedMaskedInt32x16", argLength: 4, commutative: false},
                 {name: "AddFloat32x4", argLength: 2, commutative: true},
                 {name: "AddFloat32x8", argLength: 2, commutative: true},
                 {name: "AddFloat32x16", argLength: 2, commutative: true},
@@ -64,36 +43,6 @@ func simdGenericOps() []opData {
                 {name: "AddInt64x2", argLength: 2, commutative: true},
                 {name: "AddInt64x4", argLength: 2, commutative: true},
                 {name: "AddInt64x8", argLength: 2, commutative: true},
-               {name: "AddMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "AddMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AddMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "AddMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "AddMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "AddPairsFloat32x4", argLength: 2, commutative: false},
                 {name: "AddPairsFloat32x8", argLength: 2, commutative: false},
                 {name: "AddPairsFloat64x2", argLength: 2, commutative: false},
@@ -114,18 +63,6 @@ func simdGenericOps() []opData {
                 {name: "AddSaturatedInt16x8", argLength: 2, commutative: true},
                 {name: "AddSaturatedInt16x16", argLength: 2, commutative: true},
                 {name: "AddSaturatedInt16x32", argLength: 2, commutative: true},
-               {name: "AddSaturatedMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AddSaturatedMaskedUint16x32", argLength: 3, commutative: true},
                 {name: "AddSaturatedUint8x16", argLength: 2, commutative: true},
                 {name: "AddSaturatedUint8x32", argLength: 2, commutative: true},
                 {name: "AddSaturatedUint8x64", argLength: 2, commutative: true},
@@ -160,18 +97,6 @@ func simdGenericOps() []opData {
                 {name: "AndInt64x2", argLength: 2, commutative: true},
                 {name: "AndInt64x4", argLength: 2, commutative: true},
                 {name: "AndInt64x8", argLength: 2, commutative: true},
-               {name: "AndMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "AndMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "AndMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "AndMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "AndMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "AndMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "AndNotInt8x16", argLength: 2, commutative: false},
                 {name: "AndNotInt8x32", argLength: 2, commutative: false},
                 {name: "AndNotInt8x64", argLength: 2, commutative: false},
@@ -184,18 +109,6 @@ func simdGenericOps() []opData {
                 {name: "AndNotInt64x2", argLength: 2, commutative: false},
                 {name: "AndNotInt64x4", argLength: 2, commutative: false},
                 {name: "AndNotInt64x8", argLength: 2, commutative: false},
-               {name: "AndNotMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "AndNotUint8x16", argLength: 2, commutative: false},
                 {name: "AndNotUint8x32", argLength: 2, commutative: false},
                 {name: "AndNotUint8x64", argLength: 2, commutative: false},
@@ -220,12 +133,6 @@ func simdGenericOps() []opData {
                 {name: "AndUint64x2", argLength: 2, commutative: true},
                 {name: "AndUint64x4", argLength: 2, commutative: true},
                 {name: "AndUint64x8", argLength: 2, commutative: true},
-               {name: "AverageMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "AverageMaskedUint16x32", argLength: 3, commutative: true},
                 {name: "AverageUint8x16", argLength: 2, commutative: true},
                 {name: "AverageUint8x32", argLength: 2, commutative: true},
                 {name: "AverageUint8x64", argLength: 2, commutative: true},
@@ -238,16 +145,6 @@ func simdGenericOps() []opData {
                 {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
                 {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
                 {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast128MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast128MaskedUint64x2", argLength: 2, commutative: false},
                 {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
                 {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
                 {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
@@ -258,16 +155,6 @@ func simdGenericOps() []opData {
                 {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
                 {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
                 {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast256MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast256MaskedUint64x2", argLength: 2, commutative: false},
                 {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
                 {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
                 {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
@@ -278,16 +165,6 @@ func simdGenericOps() []opData {
                 {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
                 {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
                 {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast512MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt8x16", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt16x8", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedInt64x2", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint8x16", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint16x8", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint32x4", argLength: 2, commutative: false},
-               {name: "Broadcast512MaskedUint64x2", argLength: 2, commutative: false},
                 {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
                 {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
                 {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
@@ -329,15 +206,9 @@ func simdGenericOps() []opData {
                 {name: "ConvertToInt32Float32x4", argLength: 1, commutative: false},
                 {name: "ConvertToInt32Float32x8", argLength: 1, commutative: false},
                 {name: "ConvertToInt32Float32x16", argLength: 1, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ConvertToInt32MaskedFloat32x16", argLength: 2, commutative: false},
                 {name: "ConvertToUint32Float32x4", argLength: 1, commutative: false},
                 {name: "ConvertToUint32Float32x8", argLength: 1, commutative: false},
                 {name: "ConvertToUint32Float32x16", argLength: 1, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ConvertToUint32MaskedFloat32x16", argLength: 2, commutative: false},
                 {name: "CopySignInt8x16", argLength: 2, commutative: false},
                 {name: "CopySignInt8x32", argLength: 2, commutative: false},
                 {name: "CopySignInt16x8", argLength: 2, commutative: false},
@@ -350,21 +221,9 @@ func simdGenericOps() []opData {
                 {name: "DivFloat64x2", argLength: 2, commutative: false},
                 {name: "DivFloat64x4", argLength: 2, commutative: false},
                 {name: "DivFloat64x8", argLength: 2, commutative: false},
-               {name: "DivMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "DivMaskedFloat64x8", argLength: 3, commutative: false},
                 {name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
                 {name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
                 {name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
-               {name: "DotProdPairsMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "DotProdPairsMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "DotProdPairsMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "DotProdPairsSaturatedMaskedUint8x64", argLength: 3, commutative: false},
                 {name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
                 {name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
                 {name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
@@ -386,36 +245,6 @@ func simdGenericOps() []opData {
                 {name: "EqualInt64x2", argLength: 2, commutative: true},
                 {name: "EqualInt64x4", argLength: 2, commutative: true},
                 {name: "EqualInt64x8", argLength: 2, commutative: true},
-               {name: "EqualMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "EqualMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "EqualUint8x16", argLength: 2, commutative: true},
                 {name: "EqualUint8x32", argLength: 2, commutative: true},
                 {name: "EqualUint8x64", argLength: 2, commutative: true},
@@ -462,9 +291,6 @@ func simdGenericOps() []opData {
                 {name: "FloorFloat32x8", argLength: 1, commutative: false},
                 {name: "FloorFloat64x2", argLength: 1, commutative: false},
                 {name: "FloorFloat64x4", argLength: 1, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GaloisFieldMulMaskedUint8x64", argLength: 3, commutative: false},
                 {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
                 {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
                 {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
@@ -518,36 +344,6 @@ func simdGenericOps() []opData {
                 {name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
                 {name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
                 {name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
                 {name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
                 {name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
@@ -570,36 +366,6 @@ func simdGenericOps() []opData {
                 {name: "GreaterInt64x2", argLength: 2, commutative: false},
                 {name: "GreaterInt64x4", argLength: 2, commutative: false},
                 {name: "GreaterInt64x8", argLength: 2, commutative: false},
-               {name: "GreaterMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "GreaterUint8x64", argLength: 2, commutative: false},
                 {name: "GreaterUint16x32", argLength: 2, commutative: false},
                 {name: "GreaterUint32x16", argLength: 2, commutative: false},
@@ -610,12 +376,6 @@ func simdGenericOps() []opData {
                 {name: "IsNanFloat64x2", argLength: 2, commutative: true},
                 {name: "IsNanFloat64x4", argLength: 2, commutative: true},
                 {name: "IsNanFloat64x8", argLength: 2, commutative: true},
-               {name: "IsNanMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "IsNanMaskedFloat64x8", argLength: 3, commutative: true},
                 {name: "LessEqualFloat32x4", argLength: 2, commutative: false},
                 {name: "LessEqualFloat32x8", argLength: 2, commutative: false},
                 {name: "LessEqualFloat32x16", argLength: 2, commutative: false},
@@ -626,36 +386,6 @@ func simdGenericOps() []opData {
                 {name: "LessEqualInt16x32", argLength: 2, commutative: false},
                 {name: "LessEqualInt32x16", argLength: 2, commutative: false},
                 {name: "LessEqualInt64x8", argLength: 2, commutative: false},
-               {name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "LessEqualUint8x64", argLength: 2, commutative: false},
                 {name: "LessEqualUint16x32", argLength: 2, commutative: false},
                 {name: "LessEqualUint32x16", argLength: 2, commutative: false},
@@ -670,36 +400,6 @@ func simdGenericOps() []opData {
                 {name: "LessInt16x32", argLength: 2, commutative: false},
                 {name: "LessInt32x16", argLength: 2, commutative: false},
                 {name: "LessInt64x8", argLength: 2, commutative: false},
-               {name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "LessMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "LessMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "LessMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "LessMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "LessUint8x64", argLength: 2, commutative: false},
                 {name: "LessUint16x32", argLength: 2, commutative: false},
                 {name: "LessUint32x16", argLength: 2, commutative: false},
@@ -722,36 +422,6 @@ func simdGenericOps() []opData {
                 {name: "MaxInt64x2", argLength: 2, commutative: true},
                 {name: "MaxInt64x4", argLength: 2, commutative: true},
                 {name: "MaxInt64x8", argLength: 2, commutative: true},
-               {name: "MaxMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MaxMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "MaxUint8x16", argLength: 2, commutative: true},
                 {name: "MaxUint8x32", argLength: 2, commutative: true},
                 {name: "MaxUint8x64", argLength: 2, commutative: true},
@@ -782,36 +452,6 @@ func simdGenericOps() []opData {
                 {name: "MinInt64x2", argLength: 2, commutative: true},
                 {name: "MinInt64x4", argLength: 2, commutative: true},
                 {name: "MinInt64x8", argLength: 2, commutative: true},
-               {name: "MinMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "MinMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "MinMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MinMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MinMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "MinUint8x16", argLength: 2, commutative: true},
                 {name: "MinUint8x32", argLength: 2, commutative: true},
                 {name: "MinUint8x64", argLength: 2, commutative: true},
@@ -830,24 +470,12 @@ func simdGenericOps() []opData {
                 {name: "MulAddFloat64x2", argLength: 3, commutative: false},
                 {name: "MulAddFloat64x4", argLength: 3, commutative: false},
                 {name: "MulAddFloat64x8", argLength: 3, commutative: false},
-               {name: "MulAddMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulAddMaskedFloat64x8", argLength: 4, commutative: false},
                 {name: "MulAddSubFloat32x4", argLength: 3, commutative: false},
                 {name: "MulAddSubFloat32x8", argLength: 3, commutative: false},
                 {name: "MulAddSubFloat32x16", argLength: 3, commutative: false},
                 {name: "MulAddSubFloat64x2", argLength: 3, commutative: false},
                 {name: "MulAddSubFloat64x4", argLength: 3, commutative: false},
                 {name: "MulAddSubFloat64x8", argLength: 3, commutative: false},
-               {name: "MulAddSubMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulAddSubMaskedFloat64x8", argLength: 4, commutative: false},
                 {name: "MulEvenWidenInt32x4", argLength: 2, commutative: true},
                 {name: "MulEvenWidenInt32x8", argLength: 2, commutative: true},
                 {name: "MulEvenWidenUint32x4", argLength: 2, commutative: true},
@@ -861,12 +489,6 @@ func simdGenericOps() []opData {
                 {name: "MulHighInt16x8", argLength: 2, commutative: true},
                 {name: "MulHighInt16x16", argLength: 2, commutative: true},
                 {name: "MulHighInt16x32", argLength: 2, commutative: true},
-               {name: "MulHighMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MulHighMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MulHighMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MulHighMaskedUint16x32", argLength: 3, commutative: true},
                 {name: "MulHighUint16x8", argLength: 2, commutative: true},
                 {name: "MulHighUint16x16", argLength: 2, commutative: true},
                 {name: "MulHighUint16x32", argLength: 2, commutative: true},
@@ -879,42 +501,12 @@ func simdGenericOps() []opData {
                 {name: "MulInt64x2", argLength: 2, commutative: true},
                 {name: "MulInt64x4", argLength: 2, commutative: true},
                 {name: "MulInt64x8", argLength: 2, commutative: true},
-               {name: "MulMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "MulMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "MulMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "MulMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "MulMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "MulSubAddFloat32x4", argLength: 3, commutative: false},
                 {name: "MulSubAddFloat32x8", argLength: 3, commutative: false},
                 {name: "MulSubAddFloat32x16", argLength: 3, commutative: false},
                 {name: "MulSubAddFloat64x2", argLength: 3, commutative: false},
                 {name: "MulSubAddFloat64x4", argLength: 3, commutative: false},
                 {name: "MulSubAddFloat64x8", argLength: 3, commutative: false},
-               {name: "MulSubAddMaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "MulSubAddMaskedFloat64x8", argLength: 4, commutative: false},
                 {name: "MulUint16x8", argLength: 2, commutative: true},
                 {name: "MulUint16x16", argLength: 2, commutative: true},
                 {name: "MulUint16x32", argLength: 2, commutative: true},
@@ -934,36 +526,6 @@ func simdGenericOps() []opData {
                 {name: "NotEqualInt16x32", argLength: 2, commutative: true},
                 {name: "NotEqualInt32x16", argLength: 2, commutative: true},
                 {name: "NotEqualInt64x8", argLength: 2, commutative: true},
-               {name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedFloat64x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt8x64", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt16x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint8x64", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint16x32", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "NotEqualUint8x64", argLength: 2, commutative: true},
                 {name: "NotEqualUint16x32", argLength: 2, commutative: true},
                 {name: "NotEqualUint32x16", argLength: 2, commutative: true},
@@ -980,30 +542,6 @@ func simdGenericOps() []opData {
                 {name: "OnesCountInt64x2", argLength: 1, commutative: false},
                 {name: "OnesCountInt64x4", argLength: 1, commutative: false},
                 {name: "OnesCountInt64x8", argLength: 1, commutative: false},
-               {name: "OnesCountMaskedInt8x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt8x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt8x64", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt16x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt32x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x2", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedInt64x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint8x64", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint16x32", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x8", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint32x16", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x2", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x4", argLength: 2, commutative: false},
-               {name: "OnesCountMaskedUint64x8", argLength: 2, commutative: false},
                 {name: "OnesCountUint8x16", argLength: 1, commutative: false},
                 {name: "OnesCountUint8x32", argLength: 1, commutative: false},
                 {name: "OnesCountUint8x64", argLength: 1, commutative: false},
@@ -1028,18 +566,6 @@ func simdGenericOps() []opData {
                 {name: "OrInt64x2", argLength: 2, commutative: true},
                 {name: "OrInt64x4", argLength: 2, commutative: true},
                 {name: "OrInt64x8", argLength: 2, commutative: true},
-               {name: "OrMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "OrMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "OrMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "OrMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "OrMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "OrMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "OrUint8x16", argLength: 2, commutative: true},
                 {name: "OrUint8x32", argLength: 2, commutative: true},
                 {name: "OrUint8x64", argLength: 2, commutative: true},
@@ -1070,36 +596,6 @@ func simdGenericOps() []opData {
                 {name: "Permute2Int64x2", argLength: 3, commutative: false},
                 {name: "Permute2Int64x4", argLength: 3, commutative: false},
                 {name: "Permute2Int64x8", argLength: 3, commutative: false},
-               {name: "Permute2MaskedFloat32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt8x64", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint8x64", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x8", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x2", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
-               {name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
                 {name: "Permute2Uint8x16", argLength: 3, commutative: false},
                 {name: "Permute2Uint8x32", argLength: 3, commutative: false},
                 {name: "Permute2Uint8x64", argLength: 3, commutative: false},
@@ -1126,30 +622,6 @@ func simdGenericOps() []opData {
                 {name: "PermuteInt32x16", argLength: 2, commutative: false},
                 {name: "PermuteInt64x4", argLength: 2, commutative: false},
                 {name: "PermuteInt64x8", argLength: 2, commutative: false},
-               {name: "PermuteMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "PermuteUint8x16", argLength: 2, commutative: false},
                 {name: "PermuteUint8x32", argLength: 2, commutative: false},
                 {name: "PermuteUint8x64", argLength: 2, commutative: false},
@@ -1166,42 +638,18 @@ func simdGenericOps() []opData {
                 {name: "ReciprocalFloat64x2", argLength: 1, commutative: false},
                 {name: "ReciprocalFloat64x4", argLength: 1, commutative: false},
                 {name: "ReciprocalFloat64x8", argLength: 1, commutative: false},
-               {name: "ReciprocalMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "ReciprocalMaskedFloat64x8", argLength: 2, commutative: false},
                 {name: "ReciprocalSqrtFloat32x4", argLength: 1, commutative: false},
                 {name: "ReciprocalSqrtFloat32x8", argLength: 1, commutative: false},
                 {name: "ReciprocalSqrtFloat32x16", argLength: 1, commutative: false},
                 {name: "ReciprocalSqrtFloat64x2", argLength: 1, commutative: false},
                 {name: "ReciprocalSqrtFloat64x4", argLength: 1, commutative: false},
                 {name: "ReciprocalSqrtFloat64x8", argLength: 1, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "ReciprocalSqrtMaskedFloat64x8", argLength: 2, commutative: false},
                 {name: "RotateLeftInt32x4", argLength: 2, commutative: false},
                 {name: "RotateLeftInt32x8", argLength: 2, commutative: false},
                 {name: "RotateLeftInt32x16", argLength: 2, commutative: false},
                 {name: "RotateLeftInt64x2", argLength: 2, commutative: false},
                 {name: "RotateLeftInt64x4", argLength: 2, commutative: false},
                 {name: "RotateLeftInt64x8", argLength: 2, commutative: false},
-               {name: "RotateLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "RotateLeftMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "RotateLeftUint32x4", argLength: 2, commutative: false},
                 {name: "RotateLeftUint32x8", argLength: 2, commutative: false},
                 {name: "RotateLeftUint32x16", argLength: 2, commutative: false},
@@ -1214,18 +662,6 @@ func simdGenericOps() []opData {
                 {name: "RotateRightInt64x2", argLength: 2, commutative: false},
                 {name: "RotateRightInt64x4", argLength: 2, commutative: false},
                 {name: "RotateRightInt64x8", argLength: 2, commutative: false},
-               {name: "RotateRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "RotateRightMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "RotateRightUint32x4", argLength: 2, commutative: false},
                 {name: "RotateRightUint32x8", argLength: 2, commutative: false},
                 {name: "RotateRightUint32x16", argLength: 2, commutative: false},
@@ -1242,12 +678,6 @@ func simdGenericOps() []opData {
                 {name: "ScaleFloat64x2", argLength: 2, commutative: false},
                 {name: "ScaleFloat64x4", argLength: 2, commutative: false},
                 {name: "ScaleFloat64x8", argLength: 2, commutative: false},
-               {name: "ScaleMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
                 {name: "SetHiFloat32x8", argLength: 2, commutative: false},
                 {name: "SetHiFloat32x16", argLength: 2, commutative: false},
                 {name: "SetHiFloat64x4", argLength: 2, commutative: false},
@@ -1297,24 +727,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftAllLeftInt64x2", argLength: 2, commutative: false},
                 {name: "ShiftAllLeftInt64x4", argLength: 2, commutative: false},
                 {name: "ShiftAllLeftInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllLeftMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "ShiftAllLeftUint16x8", argLength: 2, commutative: false},
                 {name: "ShiftAllLeftUint16x16", argLength: 2, commutative: false},
                 {name: "ShiftAllLeftUint16x32", argLength: 2, commutative: false},
@@ -1333,24 +745,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftAllRightInt64x2", argLength: 2, commutative: false},
                 {name: "ShiftAllRightInt64x4", argLength: 2, commutative: false},
                 {name: "ShiftAllRightInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftAllRightMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "ShiftAllRightUint16x8", argLength: 2, commutative: false},
                 {name: "ShiftAllRightUint16x16", argLength: 2, commutative: false},
                 {name: "ShiftAllRightUint16x32", argLength: 2, commutative: false},
@@ -1369,24 +763,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftLeftConcatInt64x2", argLength: 3, commutative: false},
                 {name: "ShiftLeftConcatInt64x4", argLength: 3, commutative: false},
                 {name: "ShiftLeftConcatInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt16x32", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt32x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x2", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedInt64x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint16x32", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x8", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint32x16", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x2", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x4", argLength: 4, commutative: false},
-               {name: "ShiftLeftConcatMaskedUint64x8", argLength: 4, commutative: false},
                 {name: "ShiftLeftConcatUint16x8", argLength: 3, commutative: false},
                 {name: "ShiftLeftConcatUint16x16", argLength: 3, commutative: false},
                 {name: "ShiftLeftConcatUint16x32", argLength: 3, commutative: false},
@@ -1405,24 +781,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftLeftInt64x2", argLength: 2, commutative: false},
                 {name: "ShiftLeftInt64x4", argLength: 2, commutative: false},
                 {name: "ShiftLeftInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftLeftMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftLeftMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "ShiftLeftUint16x8", argLength: 2, commutative: false},
                 {name: "ShiftLeftUint16x16", argLength: 2, commutative: false},
                 {name: "ShiftLeftUint16x32", argLength: 2, commutative: false},
@@ -1441,24 +799,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftRightConcatInt64x2", argLength: 3, commutative: false},
                 {name: "ShiftRightConcatInt64x4", argLength: 3, commutative: false},
                 {name: "ShiftRightConcatInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt16x32", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt32x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x2", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedInt64x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint16x32", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x8", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint32x16", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x2", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x4", argLength: 4, commutative: false},
-               {name: "ShiftRightConcatMaskedUint64x8", argLength: 4, commutative: false},
                 {name: "ShiftRightConcatUint16x8", argLength: 3, commutative: false},
                 {name: "ShiftRightConcatUint16x16", argLength: 3, commutative: false},
                 {name: "ShiftRightConcatUint16x32", argLength: 3, commutative: false},
@@ -1477,24 +817,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftRightInt64x2", argLength: 2, commutative: false},
                 {name: "ShiftRightInt64x4", argLength: 2, commutative: false},
                 {name: "ShiftRightInt64x8", argLength: 2, commutative: false},
-               {name: "ShiftRightMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "ShiftRightMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "ShiftRightUint16x8", argLength: 2, commutative: false},
                 {name: "ShiftRightUint16x16", argLength: 2, commutative: false},
                 {name: "ShiftRightUint16x32", argLength: 2, commutative: false},
@@ -1510,12 +832,6 @@ func simdGenericOps() []opData {
                 {name: "SqrtFloat64x2", argLength: 1, commutative: false},
                 {name: "SqrtFloat64x4", argLength: 1, commutative: false},
                 {name: "SqrtFloat64x8", argLength: 1, commutative: false},
-               {name: "SqrtMaskedFloat32x4", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat32x8", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat32x16", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x2", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x4", argLength: 2, commutative: false},
-               {name: "SqrtMaskedFloat64x8", argLength: 2, commutative: false},
                 {name: "SubFloat32x4", argLength: 2, commutative: false},
                 {name: "SubFloat32x8", argLength: 2, commutative: false},
                 {name: "SubFloat32x16", argLength: 2, commutative: false},
@@ -1534,36 +850,6 @@ func simdGenericOps() []opData {
                 {name: "SubInt64x2", argLength: 2, commutative: false},
                 {name: "SubInt64x4", argLength: 2, commutative: false},
                 {name: "SubInt64x8", argLength: 2, commutative: false},
-               {name: "SubMaskedFloat32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedFloat64x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "SubMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedInt32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedInt64x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "SubMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint16x32", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x4", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x8", argLength: 3, commutative: false},
-               {name: "SubMaskedUint32x16", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x2", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x4", argLength: 3, commutative: false},
-               {name: "SubMaskedUint64x8", argLength: 3, commutative: false},
                 {name: "SubPairsFloat32x4", argLength: 2, commutative: false},
                 {name: "SubPairsFloat32x8", argLength: 2, commutative: false},
                 {name: "SubPairsFloat64x2", argLength: 2, commutative: false},
@@ -1584,18 +870,6 @@ func simdGenericOps() []opData {
                 {name: "SubSaturatedInt16x8", argLength: 2, commutative: false},
                 {name: "SubSaturatedInt16x16", argLength: 2, commutative: false},
                 {name: "SubSaturatedInt16x32", argLength: 2, commutative: false},
-               {name: "SubSaturatedMaskedInt8x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt8x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt8x64", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x8", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedInt16x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x32", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint8x64", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x8", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x16", argLength: 3, commutative: false},
-               {name: "SubSaturatedMaskedUint16x32", argLength: 3, commutative: false},
                 {name: "SubSaturatedUint8x16", argLength: 2, commutative: false},
                 {name: "SubSaturatedUint8x32", argLength: 2, commutative: false},
                 {name: "SubSaturatedUint8x64", argLength: 2, commutative: false},
@@ -1630,18 +904,6 @@ func simdGenericOps() []opData {
                 {name: "XorInt64x2", argLength: 2, commutative: true},
                 {name: "XorInt64x4", argLength: 2, commutative: true},
                 {name: "XorInt64x8", argLength: 2, commutative: true},
-               {name: "XorMaskedInt32x4", argLength: 3, commutative: true},
-               {name: "XorMaskedInt32x8", argLength: 3, commutative: true},
-               {name: "XorMaskedInt32x16", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x2", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x4", argLength: 3, commutative: true},
-               {name: "XorMaskedInt64x8", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x4", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x8", argLength: 3, commutative: true},
-               {name: "XorMaskedUint32x16", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x2", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x4", argLength: 3, commutative: true},
-               {name: "XorMaskedUint64x8", argLength: 3, commutative: true},
                 {name: "XorUint8x16", argLength: 2, commutative: true},
                 {name: "XorUint8x32", argLength: 2, commutative: true},
                 {name: "XorUint8x64", argLength: 2, commutative: true},
@@ -1666,57 +928,27 @@ func simdGenericOps() []opData {
                 {name: "CeilScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "CeilScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "CeilScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "FloorScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "FloorScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformInverseMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformInverseUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformInverseUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformInverseUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "GaloisFieldAffineTransformMaskedUint8x64", argLength: 3, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1736,18 +968,6 @@ func simdGenericOps() []opData {
                 {name: "RotateAllLeftInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllLeftInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllLeftMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "RotateAllLeftUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllLeftUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1760,18 +980,6 @@ func simdGenericOps() []opData {
                 {name: "RotateAllRightInt64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllRightInt64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RotateAllRightMaskedUint64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "RotateAllRightUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RotateAllRightUint32x16", argLength: 1, commutative: false, aux: "UInt8"},
@@ -1784,24 +992,12 @@ func simdGenericOps() []opData {
                 {name: "RoundToEvenScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "RoundToEvenScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "RoundToEvenScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "SetElemFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "SetElemFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1821,24 +1017,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftAllLeftConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllLeftConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllLeftConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllLeftConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllLeftConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllLeftConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllLeftConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1857,24 +1035,6 @@ func simdGenericOps() []opData {
                 {name: "ShiftAllRightConcatInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllRightConcatInt64x4", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllRightConcatInt64x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint16x32", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
-               {name: "ShiftAllRightConcatMaskedUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllRightConcatUint16x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllRightConcatUint16x16", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "ShiftAllRightConcatUint16x32", argLength: 2, commutative: false, aux: "UInt8"},
@@ -1890,23 +1050,11 @@ func simdGenericOps() []opData {
                 {name: "TruncScaledFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat64x4", argLength: 1, commutative: false, aux: "UInt8"},
                 {name: "TruncScaledResidueFloat64x8", argLength: 1, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x8", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat32x16", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "UInt8"},
-               {name: "TruncScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "UInt8"},
         }
  }
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index b45cccd96bbb22238a6f75cf1da6ee9e23eadf47..9f6e10c95cbb99dc3389d86e379e49f7e5bcdeec 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4648,36 +4648,15 @@ const (
         OpAbsInt64x2
         OpAbsInt64x4
         OpAbsInt64x8
-       OpAbsMaskedInt8x16
-       OpAbsMaskedInt8x32
-       OpAbsMaskedInt8x64
-       OpAbsMaskedInt16x8
-       OpAbsMaskedInt16x16
-       OpAbsMaskedInt16x32
-       OpAbsMaskedInt32x4
-       OpAbsMaskedInt32x8
-       OpAbsMaskedInt32x16
-       OpAbsMaskedInt64x2
-       OpAbsMaskedInt64x4
-       OpAbsMaskedInt64x8
         OpAddDotProdPairsSaturatedInt32x4
         OpAddDotProdPairsSaturatedInt32x8
         OpAddDotProdPairsSaturatedInt32x16
-       OpAddDotProdPairsSaturatedMaskedInt32x4
-       OpAddDotProdPairsSaturatedMaskedInt32x8
-       OpAddDotProdPairsSaturatedMaskedInt32x16
         OpAddDotProdQuadrupleInt32x4
         OpAddDotProdQuadrupleInt32x8
         OpAddDotProdQuadrupleInt32x16
-       OpAddDotProdQuadrupleMaskedInt32x4
-       OpAddDotProdQuadrupleMaskedInt32x8
-       OpAddDotProdQuadrupleMaskedInt32x16
         OpAddDotProdQuadrupleSaturatedInt32x4
         OpAddDotProdQuadrupleSaturatedInt32x8
         OpAddDotProdQuadrupleSaturatedInt32x16
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x4
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x8
-       OpAddDotProdQuadrupleSaturatedMaskedInt32x16
         OpAddFloat32x4
         OpAddFloat32x8
         OpAddFloat32x16
@@ -4696,36 +4675,6 @@ const (
         OpAddInt64x2
         OpAddInt64x4
         OpAddInt64x8
-       OpAddMaskedFloat32x4
-       OpAddMaskedFloat32x8
-       OpAddMaskedFloat32x16
-       OpAddMaskedFloat64x2
-       OpAddMaskedFloat64x4
-       OpAddMaskedFloat64x8
-       OpAddMaskedInt8x16
-       OpAddMaskedInt8x32
-       OpAddMaskedInt8x64
-       OpAddMaskedInt16x8
-       OpAddMaskedInt16x16
-       OpAddMaskedInt16x32
-       OpAddMaskedInt32x4
-       OpAddMaskedInt32x8
-       OpAddMaskedInt32x16
-       OpAddMaskedInt64x2
-       OpAddMaskedInt64x4
-       OpAddMaskedInt64x8
-       OpAddMaskedUint8x16
-       OpAddMaskedUint8x32
-       OpAddMaskedUint8x64
-       OpAddMaskedUint16x8
-       OpAddMaskedUint16x16
-       OpAddMaskedUint16x32
-       OpAddMaskedUint32x4
-       OpAddMaskedUint32x8
-       OpAddMaskedUint32x16
-       OpAddMaskedUint64x2
-       OpAddMaskedUint64x4
-       OpAddMaskedUint64x8
         OpAddPairsFloat32x4
         OpAddPairsFloat32x8
         OpAddPairsFloat64x2
@@ -4746,18 +4695,6 @@ const (
         OpAddSaturatedInt16x8
         OpAddSaturatedInt16x16
         OpAddSaturatedInt16x32
-       OpAddSaturatedMaskedInt8x16
-       OpAddSaturatedMaskedInt8x32
-       OpAddSaturatedMaskedInt8x64
-       OpAddSaturatedMaskedInt16x8
-       OpAddSaturatedMaskedInt16x16
-       OpAddSaturatedMaskedInt16x32
-       OpAddSaturatedMaskedUint8x16
-       OpAddSaturatedMaskedUint8x32
-       OpAddSaturatedMaskedUint8x64
-       OpAddSaturatedMaskedUint16x8
-       OpAddSaturatedMaskedUint16x16
-       OpAddSaturatedMaskedUint16x32
         OpAddSaturatedUint8x16
         OpAddSaturatedUint8x32
         OpAddSaturatedUint8x64
@@ -4792,18 +4729,6 @@ const (
         OpAndInt64x2
         OpAndInt64x4
         OpAndInt64x8
-       OpAndMaskedInt32x4
-       OpAndMaskedInt32x8
-       OpAndMaskedInt32x16
-       OpAndMaskedInt64x2
-       OpAndMaskedInt64x4
-       OpAndMaskedInt64x8
-       OpAndMaskedUint32x4
-       OpAndMaskedUint32x8
-       OpAndMaskedUint32x16
-       OpAndMaskedUint64x2
-       OpAndMaskedUint64x4
-       OpAndMaskedUint64x8
         OpAndNotInt8x16
         OpAndNotInt8x32
         OpAndNotInt8x64
@@ -4816,18 +4741,6 @@ const (
         OpAndNotInt64x2
         OpAndNotInt64x4
         OpAndNotInt64x8
-       OpAndNotMaskedInt32x4
-       OpAndNotMaskedInt32x8
-       OpAndNotMaskedInt32x16
-       OpAndNotMaskedInt64x2
-       OpAndNotMaskedInt64x4
-       OpAndNotMaskedInt64x8
-       OpAndNotMaskedUint32x4
-       OpAndNotMaskedUint32x8
-       OpAndNotMaskedUint32x16
-       OpAndNotMaskedUint64x2
-       OpAndNotMaskedUint64x4
-       OpAndNotMaskedUint64x8
         OpAndNotUint8x16
         OpAndNotUint8x32
         OpAndNotUint8x64
@@ -4852,12 +4765,6 @@ const (
         OpAndUint64x2
         OpAndUint64x4
         OpAndUint64x8
-       OpAverageMaskedUint8x16
-       OpAverageMaskedUint8x32
-       OpAverageMaskedUint8x64
-       OpAverageMaskedUint16x8
-       OpAverageMaskedUint16x16
-       OpAverageMaskedUint16x32
         OpAverageUint8x16
         OpAverageUint8x32
         OpAverageUint8x64
@@ -4870,16 +4777,6 @@ const (
         OpBroadcast128Int16x8
         OpBroadcast128Int32x4
         OpBroadcast128Int64x2
-       OpBroadcast128MaskedFloat32x4
-       OpBroadcast128MaskedFloat64x2
-       OpBroadcast128MaskedInt8x16
-       OpBroadcast128MaskedInt16x8
-       OpBroadcast128MaskedInt32x4
-       OpBroadcast128MaskedInt64x2
-       OpBroadcast128MaskedUint8x16
-       OpBroadcast128MaskedUint16x8
-       OpBroadcast128MaskedUint32x4
-       OpBroadcast128MaskedUint64x2
         OpBroadcast128Uint8x16
         OpBroadcast128Uint16x8
         OpBroadcast128Uint32x4
@@ -4890,16 +4787,6 @@ const (
         OpBroadcast256Int16x8
         OpBroadcast256Int32x4
         OpBroadcast256Int64x2
-       OpBroadcast256MaskedFloat32x4
-       OpBroadcast256MaskedFloat64x2
-       OpBroadcast256MaskedInt8x16
-       OpBroadcast256MaskedInt16x8
-       OpBroadcast256MaskedInt32x4
-       OpBroadcast256MaskedInt64x2
-       OpBroadcast256MaskedUint8x16
-       OpBroadcast256MaskedUint16x8
-       OpBroadcast256MaskedUint32x4
-       OpBroadcast256MaskedUint64x2
         OpBroadcast256Uint8x16
         OpBroadcast256Uint16x8
         OpBroadcast256Uint32x4
@@ -4910,16 +4797,6 @@ const (
         OpBroadcast512Int16x8
         OpBroadcast512Int32x4
         OpBroadcast512Int64x2
-       OpBroadcast512MaskedFloat32x4
-       OpBroadcast512MaskedFloat64x2
-       OpBroadcast512MaskedInt8x16
-       OpBroadcast512MaskedInt16x8
-       OpBroadcast512MaskedInt32x4
-       OpBroadcast512MaskedInt64x2
-       OpBroadcast512MaskedUint8x16
-       OpBroadcast512MaskedUint16x8
-       OpBroadcast512MaskedUint32x4
-       OpBroadcast512MaskedUint64x2
         OpBroadcast512Uint8x16
         OpBroadcast512Uint16x8
         OpBroadcast512Uint32x4
@@ -4961,15 +4838,9 @@ const (
         OpConvertToInt32Float32x4
         OpConvertToInt32Float32x8
         OpConvertToInt32Float32x16
-       OpConvertToInt32MaskedFloat32x4
-       OpConvertToInt32MaskedFloat32x8
-       OpConvertToInt32MaskedFloat32x16
         OpConvertToUint32Float32x4
         OpConvertToUint32Float32x8
         OpConvertToUint32Float32x16
-       OpConvertToUint32MaskedFloat32x4
-       OpConvertToUint32MaskedFloat32x8
-       OpConvertToUint32MaskedFloat32x16
         OpCopySignInt8x16
         OpCopySignInt8x32
         OpCopySignInt16x8
@@ -4982,21 +4853,9 @@ const (
         OpDivFloat64x2
         OpDivFloat64x4
         OpDivFloat64x8
-       OpDivMaskedFloat32x4
-       OpDivMaskedFloat32x8
-       OpDivMaskedFloat32x16
-       OpDivMaskedFloat64x2
-       OpDivMaskedFloat64x4
-       OpDivMaskedFloat64x8
         OpDotProdPairsInt16x8
         OpDotProdPairsInt16x16
         OpDotProdPairsInt16x32
-       OpDotProdPairsMaskedInt16x8
-       OpDotProdPairsMaskedInt16x16
-       OpDotProdPairsMaskedInt16x32
-       OpDotProdPairsSaturatedMaskedUint8x16
-       OpDotProdPairsSaturatedMaskedUint8x32
-       OpDotProdPairsSaturatedMaskedUint8x64
         OpDotProdPairsSaturatedUint8x16
         OpDotProdPairsSaturatedUint8x32
         OpDotProdPairsSaturatedUint8x64
@@ -5018,36 +4877,6 @@ const (
         OpEqualInt64x2
         OpEqualInt64x4
         OpEqualInt64x8
-       OpEqualMaskedFloat32x4
-       OpEqualMaskedFloat32x8
-       OpEqualMaskedFloat32x16
-       OpEqualMaskedFloat64x2
-       OpEqualMaskedFloat64x4
-       OpEqualMaskedFloat64x8
-       OpEqualMaskedInt8x16
-       OpEqualMaskedInt8x32
-       OpEqualMaskedInt8x64
-       OpEqualMaskedInt16x8
-       OpEqualMaskedInt16x16
-       OpEqualMaskedInt16x32
-       OpEqualMaskedInt32x4
-       OpEqualMaskedInt32x8
-       OpEqualMaskedInt32x16
-       OpEqualMaskedInt64x2
-       OpEqualMaskedInt64x4
-       OpEqualMaskedInt64x8
-       OpEqualMaskedUint8x16
-       OpEqualMaskedUint8x32
-       OpEqualMaskedUint8x64
-       OpEqualMaskedUint16x8
-       OpEqualMaskedUint16x16
-       OpEqualMaskedUint16x32
-       OpEqualMaskedUint32x4
-       OpEqualMaskedUint32x8
-       OpEqualMaskedUint32x16
-       OpEqualMaskedUint64x2
-       OpEqualMaskedUint64x4
-       OpEqualMaskedUint64x8
         OpEqualUint8x16
         OpEqualUint8x32
         OpEqualUint8x64
@@ -5094,9 +4923,6 @@ const (
         OpFloorFloat32x8
         OpFloorFloat64x2
         OpFloorFloat64x4
-       OpGaloisFieldMulMaskedUint8x16
-       OpGaloisFieldMulMaskedUint8x32
-       OpGaloisFieldMulMaskedUint8x64
         OpGaloisFieldMulUint8x16
         OpGaloisFieldMulUint8x32
         OpGaloisFieldMulUint8x64
@@ -5150,36 +4976,6 @@ const (
         OpGreaterEqualInt16x32
         OpGreaterEqualInt32x16
         OpGreaterEqualInt64x8
-       OpGreaterEqualMaskedFloat32x4
-       OpGreaterEqualMaskedFloat32x8
-       OpGreaterEqualMaskedFloat32x16
-       OpGreaterEqualMaskedFloat64x2
-       OpGreaterEqualMaskedFloat64x4
-       OpGreaterEqualMaskedFloat64x8
-       OpGreaterEqualMaskedInt8x16
-       OpGreaterEqualMaskedInt8x32
-       OpGreaterEqualMaskedInt8x64
-       OpGreaterEqualMaskedInt16x8
-       OpGreaterEqualMaskedInt16x16
-       OpGreaterEqualMaskedInt16x32
-       OpGreaterEqualMaskedInt32x4
-       OpGreaterEqualMaskedInt32x8
-       OpGreaterEqualMaskedInt32x16
-       OpGreaterEqualMaskedInt64x2
-       OpGreaterEqualMaskedInt64x4
-       OpGreaterEqualMaskedInt64x8
-       OpGreaterEqualMaskedUint8x16
-       OpGreaterEqualMaskedUint8x32
-       OpGreaterEqualMaskedUint8x64
-       OpGreaterEqualMaskedUint16x8
-       OpGreaterEqualMaskedUint16x16
-       OpGreaterEqualMaskedUint16x32
-       OpGreaterEqualMaskedUint32x4
-       OpGreaterEqualMaskedUint32x8
-       OpGreaterEqualMaskedUint32x16
-       OpGreaterEqualMaskedUint64x2
-       OpGreaterEqualMaskedUint64x4
-       OpGreaterEqualMaskedUint64x8
         OpGreaterEqualUint8x64
         OpGreaterEqualUint16x32
         OpGreaterEqualUint32x16
@@ -5202,36 +4998,6 @@ const (
         OpGreaterInt64x2
         OpGreaterInt64x4
         OpGreaterInt64x8
-       OpGreaterMaskedFloat32x4
-       OpGreaterMaskedFloat32x8
-       OpGreaterMaskedFloat32x16
-       OpGreaterMaskedFloat64x2
-       OpGreaterMaskedFloat64x4
-       OpGreaterMaskedFloat64x8
-       OpGreaterMaskedInt8x16
-       OpGreaterMaskedInt8x32
-       OpGreaterMaskedInt8x64
-       OpGreaterMaskedInt16x8
-       OpGreaterMaskedInt16x16
-       OpGreaterMaskedInt16x32
-       OpGreaterMaskedInt32x4
-       OpGreaterMaskedInt32x8
-       OpGreaterMaskedInt32x16
-       OpGreaterMaskedInt64x2
-       OpGreaterMaskedInt64x4
-       OpGreaterMaskedInt64x8
-       OpGreaterMaskedUint8x16
-       OpGreaterMaskedUint8x32
-       OpGreaterMaskedUint8x64
-       OpGreaterMaskedUint16x8
-       OpGreaterMaskedUint16x16
-       OpGreaterMaskedUint16x32
-       OpGreaterMaskedUint32x4
-       OpGreaterMaskedUint32x8
-       OpGreaterMaskedUint32x16
-       OpGreaterMaskedUint64x2
-       OpGreaterMaskedUint64x4
-       OpGreaterMaskedUint64x8
         OpGreaterUint8x64
         OpGreaterUint16x32
         OpGreaterUint32x16
@@ -5242,12 +5008,6 @@ const (
         OpIsNanFloat64x2
         OpIsNanFloat64x4
         OpIsNanFloat64x8
-       OpIsNanMaskedFloat32x4
-       OpIsNanMaskedFloat32x8
-       OpIsNanMaskedFloat32x16
-       OpIsNanMaskedFloat64x2
-       OpIsNanMaskedFloat64x4
-       OpIsNanMaskedFloat64x8
         OpLessEqualFloat32x4
         OpLessEqualFloat32x8
         OpLessEqualFloat32x16
@@ -5258,36 +5018,6 @@ const (
         OpLessEqualInt16x32
         OpLessEqualInt32x16
         OpLessEqualInt64x8
-       OpLessEqualMaskedFloat32x4
-       OpLessEqualMaskedFloat32x8
-       OpLessEqualMaskedFloat32x16
-       OpLessEqualMaskedFloat64x2
-       OpLessEqualMaskedFloat64x4
-       OpLessEqualMaskedFloat64x8
-       OpLessEqualMaskedInt8x16
-       OpLessEqualMaskedInt8x32
-       OpLessEqualMaskedInt8x64
-       OpLessEqualMaskedInt16x8
-       OpLessEqualMaskedInt16x16
-       OpLessEqualMaskedInt16x32
-       OpLessEqualMaskedInt32x4
-       OpLessEqualMaskedInt32x8
-       OpLessEqualMaskedInt32x16
-       OpLessEqualMaskedInt64x2
-       OpLessEqualMaskedInt64x4
-       OpLessEqualMaskedInt64x8
-       OpLessEqualMaskedUint8x16
-       OpLessEqualMaskedUint8x32
-       OpLessEqualMaskedUint8x64
-       OpLessEqualMaskedUint16x8
-       OpLessEqualMaskedUint16x16
-       OpLessEqualMaskedUint16x32
-       OpLessEqualMaskedUint32x4
-       OpLessEqualMaskedUint32x8
-       OpLessEqualMaskedUint32x16
-       OpLessEqualMaskedUint64x2
-       OpLessEqualMaskedUint64x4
-       OpLessEqualMaskedUint64x8
         OpLessEqualUint8x64
         OpLessEqualUint16x32
         OpLessEqualUint32x16
@@ -5302,36 +5032,6 @@ const (
         OpLessInt16x32
         OpLessInt32x16
         OpLessInt64x8
-       OpLessMaskedFloat32x4
-       OpLessMaskedFloat32x8
-       OpLessMaskedFloat32x16
-       OpLessMaskedFloat64x2
-       OpLessMaskedFloat64x4
-       OpLessMaskedFloat64x8
-       OpLessMaskedInt8x16
-       OpLessMaskedInt8x32
-       OpLessMaskedInt8x64
-       OpLessMaskedInt16x8
-       OpLessMaskedInt16x16
-       OpLessMaskedInt16x32
-       OpLessMaskedInt32x4
-       OpLessMaskedInt32x8
-       OpLessMaskedInt32x16
-       OpLessMaskedInt64x2
-       OpLessMaskedInt64x4
-       OpLessMaskedInt64x8
-       OpLessMaskedUint8x16
-       OpLessMaskedUint8x32
-       OpLessMaskedUint8x64
-       OpLessMaskedUint16x8
-       OpLessMaskedUint16x16
-       OpLessMaskedUint16x32
-       OpLessMaskedUint32x4
-       OpLessMaskedUint32x8
-       OpLessMaskedUint32x16
-       OpLessMaskedUint64x2
-       OpLessMaskedUint64x4
-       OpLessMaskedUint64x8
         OpLessUint8x64
         OpLessUint16x32
         OpLessUint32x16
@@ -5354,36 +5054,6 @@ const (
         OpMaxInt64x2
         OpMaxInt64x4
         OpMaxInt64x8
-       OpMaxMaskedFloat32x4
-       OpMaxMaskedFloat32x8
-       OpMaxMaskedFloat32x16
-       OpMaxMaskedFloat64x2
-       OpMaxMaskedFloat64x4
-       OpMaxMaskedFloat64x8
-       OpMaxMaskedInt8x16
-       OpMaxMaskedInt8x32
-       OpMaxMaskedInt8x64
-       OpMaxMaskedInt16x8
-       OpMaxMaskedInt16x16
-       OpMaxMaskedInt16x32
-       OpMaxMaskedInt32x4
-       OpMaxMaskedInt32x8
-       OpMaxMaskedInt32x16
-       OpMaxMaskedInt64x2
-       OpMaxMaskedInt64x4
-       OpMaxMaskedInt64x8
-       OpMaxMaskedUint8x16
-       OpMaxMaskedUint8x32
-       OpMaxMaskedUint8x64
-       OpMaxMaskedUint16x8
-       OpMaxMaskedUint16x16
-       OpMaxMaskedUint16x32
-       OpMaxMaskedUint32x4
-       OpMaxMaskedUint32x8
-       OpMaxMaskedUint32x16
-       OpMaxMaskedUint64x2
-       OpMaxMaskedUint64x4
-       OpMaxMaskedUint64x8
         OpMaxUint8x16
         OpMaxUint8x32
         OpMaxUint8x64
@@ -5414,36 +5084,6 @@ const (
         OpMinInt64x2
         OpMinInt64x4
         OpMinInt64x8
-       OpMinMaskedFloat32x4
-       OpMinMaskedFloat32x8
-       OpMinMaskedFloat32x16
-       OpMinMaskedFloat64x2
-       OpMinMaskedFloat64x4
-       OpMinMaskedFloat64x8
-       OpMinMaskedInt8x16
-       OpMinMaskedInt8x32
-       OpMinMaskedInt8x64
-       OpMinMaskedInt16x8
-       OpMinMaskedInt16x16
-       OpMinMaskedInt16x32
-       OpMinMaskedInt32x4
-       OpMinMaskedInt32x8
-       OpMinMaskedInt32x16
-       OpMinMaskedInt64x2
-       OpMinMaskedInt64x4
-       OpMinMaskedInt64x8
-       OpMinMaskedUint8x16
-       OpMinMaskedUint8x32
-       OpMinMaskedUint8x64
-       OpMinMaskedUint16x8
-       OpMinMaskedUint16x16
-       OpMinMaskedUint16x32
-       OpMinMaskedUint32x4
-       OpMinMaskedUint32x8
-       OpMinMaskedUint32x16
-       OpMinMaskedUint64x2
-       OpMinMaskedUint64x4
-       OpMinMaskedUint64x8
         OpMinUint8x16
         OpMinUint8x32
         OpMinUint8x64
@@ -5462,24 +5102,12 @@ const (
         OpMulAddFloat64x2
         OpMulAddFloat64x4
         OpMulAddFloat64x8
-       OpMulAddMaskedFloat32x4
-       OpMulAddMaskedFloat32x8
-       OpMulAddMaskedFloat32x16
-       OpMulAddMaskedFloat64x2
-       OpMulAddMaskedFloat64x4
-       OpMulAddMaskedFloat64x8
         OpMulAddSubFloat32x4
         OpMulAddSubFloat32x8
         OpMulAddSubFloat32x16
         OpMulAddSubFloat64x2
         OpMulAddSubFloat64x4
         OpMulAddSubFloat64x8
-       OpMulAddSubMaskedFloat32x4
-       OpMulAddSubMaskedFloat32x8
-       OpMulAddSubMaskedFloat32x16
-       OpMulAddSubMaskedFloat64x2
-       OpMulAddSubMaskedFloat64x4
-       OpMulAddSubMaskedFloat64x8
         OpMulEvenWidenInt32x4
         OpMulEvenWidenInt32x8
         OpMulEvenWidenUint32x4
@@ -5493,12 +5121,6 @@ const (
         OpMulHighInt16x8
         OpMulHighInt16x16
         OpMulHighInt16x32
-       OpMulHighMaskedInt16x8
-       OpMulHighMaskedInt16x16
-       OpMulHighMaskedInt16x32
-       OpMulHighMaskedUint16x8
-       OpMulHighMaskedUint16x16
-       OpMulHighMaskedUint16x32
         OpMulHighUint16x8
         OpMulHighUint16x16
         OpMulHighUint16x32
@@ -5511,42 +5133,12 @@ const (
         OpMulInt64x2
         OpMulInt64x4
         OpMulInt64x8
-       OpMulMaskedFloat32x4
-       OpMulMaskedFloat32x8
-       OpMulMaskedFloat32x16
-       OpMulMaskedFloat64x2
-       OpMulMaskedFloat64x4
-       OpMulMaskedFloat64x8
-       OpMulMaskedInt16x8
-       OpMulMaskedInt16x16
-       OpMulMaskedInt16x32
-       OpMulMaskedInt32x4
-       OpMulMaskedInt32x8
-       OpMulMaskedInt32x16
-       OpMulMaskedInt64x2
-       OpMulMaskedInt64x4
-       OpMulMaskedInt64x8
-       OpMulMaskedUint16x8
-       OpMulMaskedUint16x16
-       OpMulMaskedUint16x32
-       OpMulMaskedUint32x4
-       OpMulMaskedUint32x8
-       OpMulMaskedUint32x16
-       OpMulMaskedUint64x2
-       OpMulMaskedUint64x4
-       OpMulMaskedUint64x8
         OpMulSubAddFloat32x4
         OpMulSubAddFloat32x8
         OpMulSubAddFloat32x16
         OpMulSubAddFloat64x2
         OpMulSubAddFloat64x4
         OpMulSubAddFloat64x8
-       OpMulSubAddMaskedFloat32x4
-       OpMulSubAddMaskedFloat32x8
-       OpMulSubAddMaskedFloat32x16
-       OpMulSubAddMaskedFloat64x2
-       OpMulSubAddMaskedFloat64x4
-       OpMulSubAddMaskedFloat64x8
         OpMulUint16x8
         OpMulUint16x16
         OpMulUint16x32
@@ -5566,36 +5158,6 @@ const (
         OpNotEqualInt16x32
         OpNotEqualInt32x16
         OpNotEqualInt64x8
-       OpNotEqualMaskedFloat32x4
-       OpNotEqualMaskedFloat32x8
-       OpNotEqualMaskedFloat32x16
-       OpNotEqualMaskedFloat64x2
-       OpNotEqualMaskedFloat64x4
-       OpNotEqualMaskedFloat64x8
-       OpNotEqualMaskedInt8x16
-       OpNotEqualMaskedInt8x32
-       OpNotEqualMaskedInt8x64
-       OpNotEqualMaskedInt16x8
-       OpNotEqualMaskedInt16x16
-       OpNotEqualMaskedInt16x32
-       OpNotEqualMaskedInt32x4
-       OpNotEqualMaskedInt32x8
-       OpNotEqualMaskedInt32x16
-       OpNotEqualMaskedInt64x2
-       OpNotEqualMaskedInt64x4
-       OpNotEqualMaskedInt64x8
-       OpNotEqualMaskedUint8x16
-       OpNotEqualMaskedUint8x32
-       OpNotEqualMaskedUint8x64
-       OpNotEqualMaskedUint16x8
-       OpNotEqualMaskedUint16x16
-       OpNotEqualMaskedUint16x32
-       OpNotEqualMaskedUint32x4
-       OpNotEqualMaskedUint32x8
-       OpNotEqualMaskedUint32x16
-       OpNotEqualMaskedUint64x2
-       OpNotEqualMaskedUint64x4
-       OpNotEqualMaskedUint64x8
         OpNotEqualUint8x64
         OpNotEqualUint16x32
         OpNotEqualUint32x16
@@ -5612,30 +5174,6 @@ const (
         OpOnesCountInt64x2
         OpOnesCountInt64x4
         OpOnesCountInt64x8
-       OpOnesCountMaskedInt8x16
-       OpOnesCountMaskedInt8x32
-       OpOnesCountMaskedInt8x64
-       OpOnesCountMaskedInt16x8
-       OpOnesCountMaskedInt16x16
-       OpOnesCountMaskedInt16x32
-       OpOnesCountMaskedInt32x4
-       OpOnesCountMaskedInt32x8
-       OpOnesCountMaskedInt32x16
-       OpOnesCountMaskedInt64x2
-       OpOnesCountMaskedInt64x4
-       OpOnesCountMaskedInt64x8
-       OpOnesCountMaskedUint8x16
-       OpOnesCountMaskedUint8x32
-       OpOnesCountMaskedUint8x64
-       OpOnesCountMaskedUint16x8
-       OpOnesCountMaskedUint16x16
-       OpOnesCountMaskedUint16x32
-       OpOnesCountMaskedUint32x4
-       OpOnesCountMaskedUint32x8
-       OpOnesCountMaskedUint32x16
-       OpOnesCountMaskedUint64x2
-       OpOnesCountMaskedUint64x4
-       OpOnesCountMaskedUint64x8
         OpOnesCountUint8x16
         OpOnesCountUint8x32
         OpOnesCountUint8x64
@@ -5660,18 +5198,6 @@ const (
         OpOrInt64x2
         OpOrInt64x4
         OpOrInt64x8
-       OpOrMaskedInt32x4
-       OpOrMaskedInt32x8
-       OpOrMaskedInt32x16
-       OpOrMaskedInt64x2
-       OpOrMaskedInt64x4
-       OpOrMaskedInt64x8
-       OpOrMaskedUint32x4
-       OpOrMaskedUint32x8
-       OpOrMaskedUint32x16
-       OpOrMaskedUint64x2
-       OpOrMaskedUint64x4
-       OpOrMaskedUint64x8
         OpOrUint8x16
         OpOrUint8x32
         OpOrUint8x64
@@ -5702,36 +5228,6 @@ const (
         OpPermute2Int64x2
         OpPermute2Int64x4
         OpPermute2Int64x8
-       OpPermute2MaskedFloat32x4
-       OpPermute2MaskedFloat32x8
-       OpPermute2MaskedFloat32x16
-       OpPermute2MaskedFloat64x2
-       OpPermute2MaskedFloat64x4
-       OpPermute2MaskedFloat64x8
-       OpPermute2MaskedInt8x16
-       OpPermute2MaskedInt8x32
-       OpPermute2MaskedInt8x64
-       OpPermute2MaskedInt16x8
-       OpPermute2MaskedInt16x16
-       OpPermute2MaskedInt16x32
-       OpPermute2MaskedInt32x4
-       OpPermute2MaskedInt32x8
-       OpPermute2MaskedInt32x16
-       OpPermute2MaskedInt64x2
-       OpPermute2MaskedInt64x4
-       OpPermute2MaskedInt64x8
-       OpPermute2MaskedUint8x16
-       OpPermute2MaskedUint8x32
-       OpPermute2MaskedUint8x64
-       OpPermute2MaskedUint16x8
-       OpPermute2MaskedUint16x16
-       OpPermute2MaskedUint16x32
-       OpPermute2MaskedUint32x4
-       OpPermute2MaskedUint32x8
-       OpPermute2MaskedUint32x16
-       OpPermute2MaskedUint64x2
-       OpPermute2MaskedUint64x4
-       OpPermute2MaskedUint64x8
         OpPermute2Uint8x16
         OpPermute2Uint8x32
         OpPermute2Uint8x64
@@ -5758,30 +5254,6 @@ const (
         OpPermuteInt32x16
         OpPermuteInt64x4
         OpPermuteInt64x8
-       OpPermuteMaskedFloat32x8
-       OpPermuteMaskedFloat32x16
-       OpPermuteMaskedFloat64x4
-       OpPermuteMaskedFloat64x8
-       OpPermuteMaskedInt8x16
-       OpPermuteMaskedInt8x32
-       OpPermuteMaskedInt8x64
-       OpPermuteMaskedInt16x8
-       OpPermuteMaskedInt16x16
-       OpPermuteMaskedInt16x32
-       OpPermuteMaskedInt32x8
-       OpPermuteMaskedInt32x16
-       OpPermuteMaskedInt64x4
-       OpPermuteMaskedInt64x8
-       OpPermuteMaskedUint8x16
-       OpPermuteMaskedUint8x32
-       OpPermuteMaskedUint8x64
-       OpPermuteMaskedUint16x8
-       OpPermuteMaskedUint16x16
-       OpPermuteMaskedUint16x32
-       OpPermuteMaskedUint32x8
-       OpPermuteMaskedUint32x16
-       OpPermuteMaskedUint64x4
-       OpPermuteMaskedUint64x8
         OpPermuteUint8x16
         OpPermuteUint8x32
         OpPermuteUint8x64
@@ -5798,42 +5270,18 @@ const (
         OpReciprocalFloat64x2
         OpReciprocalFloat64x4
         OpReciprocalFloat64x8
-       OpReciprocalMaskedFloat32x4
-       OpReciprocalMaskedFloat32x8
-       OpReciprocalMaskedFloat32x16
-       OpReciprocalMaskedFloat64x2
-       OpReciprocalMaskedFloat64x4
-       OpReciprocalMaskedFloat64x8
         OpReciprocalSqrtFloat32x4
         OpReciprocalSqrtFloat32x8
         OpReciprocalSqrtFloat32x16
         OpReciprocalSqrtFloat64x2
         OpReciprocalSqrtFloat64x4
         OpReciprocalSqrtFloat64x8
-       OpReciprocalSqrtMaskedFloat32x4
-       OpReciprocalSqrtMaskedFloat32x8
-       OpReciprocalSqrtMaskedFloat32x16
-       OpReciprocalSqrtMaskedFloat64x2
-       OpReciprocalSqrtMaskedFloat64x4
-       OpReciprocalSqrtMaskedFloat64x8
         OpRotateLeftInt32x4
         OpRotateLeftInt32x8
         OpRotateLeftInt32x16
         OpRotateLeftInt64x2
         OpRotateLeftInt64x4
         OpRotateLeftInt64x8
-       OpRotateLeftMaskedInt32x4
-       OpRotateLeftMaskedInt32x8
-       OpRotateLeftMaskedInt32x16
-       OpRotateLeftMaskedInt64x2
-       OpRotateLeftMaskedInt64x4
-       OpRotateLeftMaskedInt64x8
-       OpRotateLeftMaskedUint32x4
-       OpRotateLeftMaskedUint32x8
-       OpRotateLeftMaskedUint32x16
-       OpRotateLeftMaskedUint64x2
-       OpRotateLeftMaskedUint64x4
-       OpRotateLeftMaskedUint64x8
         OpRotateLeftUint32x4
         OpRotateLeftUint32x8
         OpRotateLeftUint32x16
@@ -5846,18 +5294,6 @@ const (
         OpRotateRightInt64x2
         OpRotateRightInt64x4
         OpRotateRightInt64x8
-       OpRotateRightMaskedInt32x4
-       OpRotateRightMaskedInt32x8
-       OpRotateRightMaskedInt32x16
-       OpRotateRightMaskedInt64x2
-       OpRotateRightMaskedInt64x4
-       OpRotateRightMaskedInt64x8
-       OpRotateRightMaskedUint32x4
-       OpRotateRightMaskedUint32x8
-       OpRotateRightMaskedUint32x16
-       OpRotateRightMaskedUint64x2
-       OpRotateRightMaskedUint64x4
-       OpRotateRightMaskedUint64x8
         OpRotateRightUint32x4
         OpRotateRightUint32x8
         OpRotateRightUint32x16
@@ -5874,12 +5310,6 @@ const (
         OpScaleFloat64x2
         OpScaleFloat64x4
         OpScaleFloat64x8
-       OpScaleMaskedFloat32x4
-       OpScaleMaskedFloat32x8
-       OpScaleMaskedFloat32x16
-       OpScaleMaskedFloat64x2
-       OpScaleMaskedFloat64x4
-       OpScaleMaskedFloat64x8
         OpSetHiFloat32x8
         OpSetHiFloat32x16
         OpSetHiFloat64x4
@@ -5929,24 +5359,6 @@ const (
         OpShiftAllLeftInt64x2
         OpShiftAllLeftInt64x4
         OpShiftAllLeftInt64x8
-       OpShiftAllLeftMaskedInt16x8
-       OpShiftAllLeftMaskedInt16x16
-       OpShiftAllLeftMaskedInt16x32
-       OpShiftAllLeftMaskedInt32x4
-       OpShiftAllLeftMaskedInt32x8
-       OpShiftAllLeftMaskedInt32x16
-       OpShiftAllLeftMaskedInt64x2
-       OpShiftAllLeftMaskedInt64x4
-       OpShiftAllLeftMaskedInt64x8
-       OpShiftAllLeftMaskedUint16x8
-       OpShiftAllLeftMaskedUint16x16
-       OpShiftAllLeftMaskedUint16x32
-       OpShiftAllLeftMaskedUint32x4
-       OpShiftAllLeftMaskedUint32x8
-       OpShiftAllLeftMaskedUint32x16
-       OpShiftAllLeftMaskedUint64x2
-       OpShiftAllLeftMaskedUint64x4
-       OpShiftAllLeftMaskedUint64x8
         OpShiftAllLeftUint16x8
         OpShiftAllLeftUint16x16
         OpShiftAllLeftUint16x32
@@ -5965,24 +5377,6 @@ const (
         OpShiftAllRightInt64x2
         OpShiftAllRightInt64x4
         OpShiftAllRightInt64x8
-       OpShiftAllRightMaskedInt16x8
-       OpShiftAllRightMaskedInt16x16
-       OpShiftAllRightMaskedInt16x32
-       OpShiftAllRightMaskedInt32x4
-       OpShiftAllRightMaskedInt32x8
-       OpShiftAllRightMaskedInt32x16
-       OpShiftAllRightMaskedInt64x2
-       OpShiftAllRightMaskedInt64x4
-       OpShiftAllRightMaskedInt64x8
-       OpShiftAllRightMaskedUint16x8
-       OpShiftAllRightMaskedUint16x16
-       OpShiftAllRightMaskedUint16x32
-       OpShiftAllRightMaskedUint32x4
-       OpShiftAllRightMaskedUint32x8
-       OpShiftAllRightMaskedUint32x16
-       OpShiftAllRightMaskedUint64x2
-       OpShiftAllRightMaskedUint64x4
-       OpShiftAllRightMaskedUint64x8
         OpShiftAllRightUint16x8
         OpShiftAllRightUint16x16
         OpShiftAllRightUint16x32
@@ -6001,24 +5395,6 @@ const (
         OpShiftLeftConcatInt64x2
         OpShiftLeftConcatInt64x4
         OpShiftLeftConcatInt64x8
-       OpShiftLeftConcatMaskedInt16x8
-       OpShiftLeftConcatMaskedInt16x16
-       OpShiftLeftConcatMaskedInt16x32
-       OpShiftLeftConcatMaskedInt32x4
-       OpShiftLeftConcatMaskedInt32x8
-       OpShiftLeftConcatMaskedInt32x16
-       OpShiftLeftConcatMaskedInt64x2
-       OpShiftLeftConcatMaskedInt64x4
-       OpShiftLeftConcatMaskedInt64x8
-       OpShiftLeftConcatMaskedUint16x8
-       OpShiftLeftConcatMaskedUint16x16
-       OpShiftLeftConcatMaskedUint16x32
-       OpShiftLeftConcatMaskedUint32x4
-       OpShiftLeftConcatMaskedUint32x8
-       OpShiftLeftConcatMaskedUint32x16
-       OpShiftLeftConcatMaskedUint64x2
-       OpShiftLeftConcatMaskedUint64x4
-       OpShiftLeftConcatMaskedUint64x8
         OpShiftLeftConcatUint16x8
         OpShiftLeftConcatUint16x16
         OpShiftLeftConcatUint16x32
@@ -6037,24 +5413,6 @@ const (
         OpShiftLeftInt64x2
         OpShiftLeftInt64x4
         OpShiftLeftInt64x8
-       OpShiftLeftMaskedInt16x8
-       OpShiftLeftMaskedInt16x16
-       OpShiftLeftMaskedInt16x32
-       OpShiftLeftMaskedInt32x4
-       OpShiftLeftMaskedInt32x8
-       OpShiftLeftMaskedInt32x16
-       OpShiftLeftMaskedInt64x2
-       OpShiftLeftMaskedInt64x4
-       OpShiftLeftMaskedInt64x8
-       OpShiftLeftMaskedUint16x8
-       OpShiftLeftMaskedUint16x16
-       OpShiftLeftMaskedUint16x32
-       OpShiftLeftMaskedUint32x4
-       OpShiftLeftMaskedUint32x8
-       OpShiftLeftMaskedUint32x16
-       OpShiftLeftMaskedUint64x2
-       OpShiftLeftMaskedUint64x4
-       OpShiftLeftMaskedUint64x8
         OpShiftLeftUint16x8
         OpShiftLeftUint16x16
         OpShiftLeftUint16x32
@@ -6073,24 +5431,6 @@ const (
         OpShiftRightConcatInt64x2
         OpShiftRightConcatInt64x4
         OpShiftRightConcatInt64x8
-       OpShiftRightConcatMaskedInt16x8
-       OpShiftRightConcatMaskedInt16x16
-       OpShiftRightConcatMaskedInt16x32
-       OpShiftRightConcatMaskedInt32x4
-       OpShiftRightConcatMaskedInt32x8
-       OpShiftRightConcatMaskedInt32x16
-       OpShiftRightConcatMaskedInt64x2
-       OpShiftRightConcatMaskedInt64x4
-       OpShiftRightConcatMaskedInt64x8
-       OpShiftRightConcatMaskedUint16x8
-       OpShiftRightConcatMaskedUint16x16
-       OpShiftRightConcatMaskedUint16x32
-       OpShiftRightConcatMaskedUint32x4
-       OpShiftRightConcatMaskedUint32x8
-       OpShiftRightConcatMaskedUint32x16
-       OpShiftRightConcatMaskedUint64x2
-       OpShiftRightConcatMaskedUint64x4
-       OpShiftRightConcatMaskedUint64x8
         OpShiftRightConcatUint16x8
         OpShiftRightConcatUint16x16
         OpShiftRightConcatUint16x32
@@ -6109,24 +5449,6 @@ const (
         OpShiftRightInt64x2
         OpShiftRightInt64x4
         OpShiftRightInt64x8
-       OpShiftRightMaskedInt16x8
-       OpShiftRightMaskedInt16x16
-       OpShiftRightMaskedInt16x32
-       OpShiftRightMaskedInt32x4
-       OpShiftRightMaskedInt32x8
-       OpShiftRightMaskedInt32x16
-       OpShiftRightMaskedInt64x2
-       OpShiftRightMaskedInt64x4
-       OpShiftRightMaskedInt64x8
-       OpShiftRightMaskedUint16x8
-       OpShiftRightMaskedUint16x16
-       OpShiftRightMaskedUint16x32
-       OpShiftRightMaskedUint32x4
-       OpShiftRightMaskedUint32x8
-       OpShiftRightMaskedUint32x16
-       OpShiftRightMaskedUint64x2
-       OpShiftRightMaskedUint64x4
-       OpShiftRightMaskedUint64x8
         OpShiftRightUint16x8
         OpShiftRightUint16x16
         OpShiftRightUint16x32
@@ -6142,12 +5464,6 @@ const (
         OpSqrtFloat64x2
         OpSqrtFloat64x4
         OpSqrtFloat64x8
-       OpSqrtMaskedFloat32x4
-       OpSqrtMaskedFloat32x8
-       OpSqrtMaskedFloat32x16
-       OpSqrtMaskedFloat64x2
-       OpSqrtMaskedFloat64x4
-       OpSqrtMaskedFloat64x8
         OpSubFloat32x4
         OpSubFloat32x8
         OpSubFloat32x16
@@ -6166,36 +5482,6 @@ const (
         OpSubInt64x2
         OpSubInt64x4
         OpSubInt64x8
-       OpSubMaskedFloat32x4
-       OpSubMaskedFloat32x8
-       OpSubMaskedFloat32x16
-       OpSubMaskedFloat64x2
-       OpSubMaskedFloat64x4
-       OpSubMaskedFloat64x8
-       OpSubMaskedInt8x16
-       OpSubMaskedInt8x32
-       OpSubMaskedInt8x64
-       OpSubMaskedInt16x8
-       OpSubMaskedInt16x16
-       OpSubMaskedInt16x32
-       OpSubMaskedInt32x4
-       OpSubMaskedInt32x8
-       OpSubMaskedInt32x16
-       OpSubMaskedInt64x2
-       OpSubMaskedInt64x4
-       OpSubMaskedInt64x8
-       OpSubMaskedUint8x16
-       OpSubMaskedUint8x32
-       OpSubMaskedUint8x64
-       OpSubMaskedUint16x8
-       OpSubMaskedUint16x16
-       OpSubMaskedUint16x32
-       OpSubMaskedUint32x4
-       OpSubMaskedUint32x8
-       OpSubMaskedUint32x16
-       OpSubMaskedUint64x2
-       OpSubMaskedUint64x4
-       OpSubMaskedUint64x8
         OpSubPairsFloat32x4
         OpSubPairsFloat32x8
         OpSubPairsFloat64x2
@@ -6216,18 +5502,6 @@ const (
         OpSubSaturatedInt16x8
         OpSubSaturatedInt16x16
         OpSubSaturatedInt16x32
-       OpSubSaturatedMaskedInt8x16
-       OpSubSaturatedMaskedInt8x32
-       OpSubSaturatedMaskedInt8x64
-       OpSubSaturatedMaskedInt16x8
-       OpSubSaturatedMaskedInt16x16
-       OpSubSaturatedMaskedInt16x32
-       OpSubSaturatedMaskedUint8x16
-       OpSubSaturatedMaskedUint8x32
-       OpSubSaturatedMaskedUint8x64
-       OpSubSaturatedMaskedUint16x8
-       OpSubSaturatedMaskedUint16x16
-       OpSubSaturatedMaskedUint16x32
         OpSubSaturatedUint8x16
         OpSubSaturatedUint8x32
         OpSubSaturatedUint8x64
@@ -6262,18 +5536,6 @@ const (
         OpXorInt64x2
         OpXorInt64x4
         OpXorInt64x8
-       OpXorMaskedInt32x4
-       OpXorMaskedInt32x8
-       OpXorMaskedInt32x16
-       OpXorMaskedInt64x2
-       OpXorMaskedInt64x4
-       OpXorMaskedInt64x8
-       OpXorMaskedUint32x4
-       OpXorMaskedUint32x8
-       OpXorMaskedUint32x16
-       OpXorMaskedUint64x2
-       OpXorMaskedUint64x4
-       OpXorMaskedUint64x8
         OpXorUint8x16
         OpXorUint8x32
         OpXorUint8x64
@@ -6298,57 +5560,27 @@ const (
         OpCeilScaledFloat64x2
         OpCeilScaledFloat64x4
         OpCeilScaledFloat64x8
-       OpCeilScaledMaskedFloat32x4
-       OpCeilScaledMaskedFloat32x8
-       OpCeilScaledMaskedFloat32x16
-       OpCeilScaledMaskedFloat64x2
-       OpCeilScaledMaskedFloat64x4
-       OpCeilScaledMaskedFloat64x8
         OpCeilScaledResidueFloat32x4
         OpCeilScaledResidueFloat32x8
         OpCeilScaledResidueFloat32x16
         OpCeilScaledResidueFloat64x2
         OpCeilScaledResidueFloat64x4
         OpCeilScaledResidueFloat64x8
-       OpCeilScaledResidueMaskedFloat32x4
-       OpCeilScaledResidueMaskedFloat32x8
-       OpCeilScaledResidueMaskedFloat32x16
-       OpCeilScaledResidueMaskedFloat64x2
-       OpCeilScaledResidueMaskedFloat64x4
-       OpCeilScaledResidueMaskedFloat64x8
         OpFloorScaledFloat32x4
         OpFloorScaledFloat32x8
         OpFloorScaledFloat32x16
         OpFloorScaledFloat64x2
         OpFloorScaledFloat64x4
         OpFloorScaledFloat64x8
-       OpFloorScaledMaskedFloat32x4
-       OpFloorScaledMaskedFloat32x8
-       OpFloorScaledMaskedFloat32x16
-       OpFloorScaledMaskedFloat64x2
-       OpFloorScaledMaskedFloat64x4
-       OpFloorScaledMaskedFloat64x8
         OpFloorScaledResidueFloat32x4
         OpFloorScaledResidueFloat32x8
         OpFloorScaledResidueFloat32x16
         OpFloorScaledResidueFloat64x2
         OpFloorScaledResidueFloat64x4
         OpFloorScaledResidueFloat64x8
-       OpFloorScaledResidueMaskedFloat32x4
-       OpFloorScaledResidueMaskedFloat32x8
-       OpFloorScaledResidueMaskedFloat32x16
-       OpFloorScaledResidueMaskedFloat64x2
-       OpFloorScaledResidueMaskedFloat64x4
-       OpFloorScaledResidueMaskedFloat64x8
-       OpGaloisFieldAffineTransformInverseMaskedUint8x16
-       OpGaloisFieldAffineTransformInverseMaskedUint8x32
-       OpGaloisFieldAffineTransformInverseMaskedUint8x64
         OpGaloisFieldAffineTransformInverseUint8x16
         OpGaloisFieldAffineTransformInverseUint8x32
         OpGaloisFieldAffineTransformInverseUint8x64
-       OpGaloisFieldAffineTransformMaskedUint8x16
-       OpGaloisFieldAffineTransformMaskedUint8x32
-       OpGaloisFieldAffineTransformMaskedUint8x64
         OpGaloisFieldAffineTransformUint8x16
         OpGaloisFieldAffineTransformUint8x32
         OpGaloisFieldAffineTransformUint8x64
@@ -6368,18 +5600,6 @@ const (
         OpRotateAllLeftInt64x2
         OpRotateAllLeftInt64x4
         OpRotateAllLeftInt64x8
-       OpRotateAllLeftMaskedInt32x4
-       OpRotateAllLeftMaskedInt32x8
-       OpRotateAllLeftMaskedInt32x16
-       OpRotateAllLeftMaskedInt64x2
-       OpRotateAllLeftMaskedInt64x4
-       OpRotateAllLeftMaskedInt64x8
-       OpRotateAllLeftMaskedUint32x4
-       OpRotateAllLeftMaskedUint32x8
-       OpRotateAllLeftMaskedUint32x16
-       OpRotateAllLeftMaskedUint64x2
-       OpRotateAllLeftMaskedUint64x4
-       OpRotateAllLeftMaskedUint64x8
         OpRotateAllLeftUint32x4
         OpRotateAllLeftUint32x8
         OpRotateAllLeftUint32x16
@@ -6392,18 +5612,6 @@ const (
         OpRotateAllRightInt64x2
         OpRotateAllRightInt64x4
         OpRotateAllRightInt64x8
-       OpRotateAllRightMaskedInt32x4
-       OpRotateAllRightMaskedInt32x8
-       OpRotateAllRightMaskedInt32x16
-       OpRotateAllRightMaskedInt64x2
-       OpRotateAllRightMaskedInt64x4
-       OpRotateAllRightMaskedInt64x8
-       OpRotateAllRightMaskedUint32x4
-       OpRotateAllRightMaskedUint32x8
-       OpRotateAllRightMaskedUint32x16
-       OpRotateAllRightMaskedUint64x2
-       OpRotateAllRightMaskedUint64x4
-       OpRotateAllRightMaskedUint64x8
         OpRotateAllRightUint32x4
         OpRotateAllRightUint32x8
         OpRotateAllRightUint32x16
@@ -6416,24 +5624,12 @@ const (
         OpRoundToEvenScaledFloat64x2
         OpRoundToEvenScaledFloat64x4
         OpRoundToEvenScaledFloat64x8
-       OpRoundToEvenScaledMaskedFloat32x4
-       OpRoundToEvenScaledMaskedFloat32x8
-       OpRoundToEvenScaledMaskedFloat32x16
-       OpRoundToEvenScaledMaskedFloat64x2
-       OpRoundToEvenScaledMaskedFloat64x4
-       OpRoundToEvenScaledMaskedFloat64x8
         OpRoundToEvenScaledResidueFloat32x4
         OpRoundToEvenScaledResidueFloat32x8
         OpRoundToEvenScaledResidueFloat32x16
         OpRoundToEvenScaledResidueFloat64x2
         OpRoundToEvenScaledResidueFloat64x4
         OpRoundToEvenScaledResidueFloat64x8
-       OpRoundToEvenScaledResidueMaskedFloat32x4
-       OpRoundToEvenScaledResidueMaskedFloat32x8
-       OpRoundToEvenScaledResidueMaskedFloat32x16
-       OpRoundToEvenScaledResidueMaskedFloat64x2
-       OpRoundToEvenScaledResidueMaskedFloat64x4
-       OpRoundToEvenScaledResidueMaskedFloat64x8
         OpSetElemFloat32x4
         OpSetElemFloat64x2
         OpSetElemInt8x16
@@ -6453,24 +5649,6 @@ const (
         OpShiftAllLeftConcatInt64x2
         OpShiftAllLeftConcatInt64x4
         OpShiftAllLeftConcatInt64x8
-       OpShiftAllLeftConcatMaskedInt16x8
-       OpShiftAllLeftConcatMaskedInt16x16
-       OpShiftAllLeftConcatMaskedInt16x32
-       OpShiftAllLeftConcatMaskedInt32x4
-       OpShiftAllLeftConcatMaskedInt32x8
-       OpShiftAllLeftConcatMaskedInt32x16
-       OpShiftAllLeftConcatMaskedInt64x2
-       OpShiftAllLeftConcatMaskedInt64x4
-       OpShiftAllLeftConcatMaskedInt64x8
-       OpShiftAllLeftConcatMaskedUint16x8
-       OpShiftAllLeftConcatMaskedUint16x16
-       OpShiftAllLeftConcatMaskedUint16x32
-       OpShiftAllLeftConcatMaskedUint32x4
-       OpShiftAllLeftConcatMaskedUint32x8
-       OpShiftAllLeftConcatMaskedUint32x16
-       OpShiftAllLeftConcatMaskedUint64x2
-       OpShiftAllLeftConcatMaskedUint64x4
-       OpShiftAllLeftConcatMaskedUint64x8
         OpShiftAllLeftConcatUint16x8
         OpShiftAllLeftConcatUint16x16
         OpShiftAllLeftConcatUint16x32
@@ -6489,24 +5667,6 @@ const (
         OpShiftAllRightConcatInt64x2
         OpShiftAllRightConcatInt64x4
         OpShiftAllRightConcatInt64x8
-       OpShiftAllRightConcatMaskedInt16x8
-       OpShiftAllRightConcatMaskedInt16x16
-       OpShiftAllRightConcatMaskedInt16x32
-       OpShiftAllRightConcatMaskedInt32x4
-       OpShiftAllRightConcatMaskedInt32x8
-       OpShiftAllRightConcatMaskedInt32x16
-       OpShiftAllRightConcatMaskedInt64x2
-       OpShiftAllRightConcatMaskedInt64x4
-       OpShiftAllRightConcatMaskedInt64x8
-       OpShiftAllRightConcatMaskedUint16x8
-       OpShiftAllRightConcatMaskedUint16x16
-       OpShiftAllRightConcatMaskedUint16x32
-       OpShiftAllRightConcatMaskedUint32x4
-       OpShiftAllRightConcatMaskedUint32x8
-       OpShiftAllRightConcatMaskedUint32x16
-       OpShiftAllRightConcatMaskedUint64x2
-       OpShiftAllRightConcatMaskedUint64x4
-       OpShiftAllRightConcatMaskedUint64x8
         OpShiftAllRightConcatUint16x8
         OpShiftAllRightConcatUint16x16
         OpShiftAllRightConcatUint16x32
@@ -6522,24 +5682,12 @@ const (
         OpTruncScaledFloat64x2
         OpTruncScaledFloat64x4
         OpTruncScaledFloat64x8
-       OpTruncScaledMaskedFloat32x4
-       OpTruncScaledMaskedFloat32x8
-       OpTruncScaledMaskedFloat32x16
-       OpTruncScaledMaskedFloat64x2
-       OpTruncScaledMaskedFloat64x4
-       OpTruncScaledMaskedFloat64x8
         OpTruncScaledResidueFloat32x4
         OpTruncScaledResidueFloat32x8
         OpTruncScaledResidueFloat32x16
         OpTruncScaledResidueFloat64x2
         OpTruncScaledResidueFloat64x4
         OpTruncScaledResidueFloat64x8
-       OpTruncScaledResidueMaskedFloat32x4
-       OpTruncScaledResidueMaskedFloat32x8
-       OpTruncScaledResidueMaskedFloat32x16
-       OpTruncScaledResidueMaskedFloat64x2
-       OpTruncScaledResidueMaskedFloat64x4
-       OpTruncScaledResidueMaskedFloat64x8
  )
  
  var opcodeTable = [...]opInfo{
@@ -63838,66 +62986,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "AbsMaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "AbsMaskedInt64x8",
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "AddDotProdPairsSaturatedInt32x4",
                 argLen:  3,
@@ -63913,21 +63001,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  3,
                 generic: true,
         },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
         {
                 name:    "AddDotProdQuadrupleInt32x4",
                 argLen:  3,
@@ -63943,21 +63016,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  3,
                 generic: true,
         },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
         {
                 name:    "AddDotProdQuadrupleSaturatedInt32x4",
                 argLen:  3,
@@ -63973,21 +63031,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  3,
                 generic: true,
         },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleSaturatedMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
         {
                 name:        "AddFloat32x4",
                 argLen:      2,
@@ -64096,186 +63139,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "AddMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:    "AddPairsFloat32x4",
                 argLen:  2,
@@ -64382,78 +63245,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "AddSaturatedMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AddSaturatedMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:        "AddSaturatedUint8x16",
                 argLen:      2,
@@ -64654,78 +63445,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "AndMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AndMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:    "AndNotInt8x16",
                 argLen:  2,
@@ -64786,66 +63505,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "AndNotMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AndNotMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "AndNotUint8x16",
                 argLen:  2,
@@ -64978,42 +63637,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "AverageMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "AverageMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:        "AverageUint8x16",
                 argLen:      2,
@@ -65081,308 +63704,158 @@ var opcodeTable = [...]opInfo{
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedFloat32x4",
-               argLen:  2,
+               name:    "Broadcast128Uint8x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedFloat64x2",
-               argLen:  2,
+               name:    "Broadcast128Uint16x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedInt8x16",
-               argLen:  2,
+               name:    "Broadcast128Uint32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedInt16x8",
-               argLen:  2,
+               name:    "Broadcast128Uint64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedInt32x4",
-               argLen:  2,
+               name:    "Broadcast256Float32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedInt64x2",
-               argLen:  2,
+               name:    "Broadcast256Float64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedUint8x16",
-               argLen:  2,
+               name:    "Broadcast256Int8x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedUint16x8",
-               argLen:  2,
+               name:    "Broadcast256Int16x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedUint32x4",
-               argLen:  2,
+               name:    "Broadcast256Int32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128MaskedUint64x2",
-               argLen:  2,
+               name:    "Broadcast256Int64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128Uint8x16",
+               name:    "Broadcast256Uint8x16",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128Uint16x8",
+               name:    "Broadcast256Uint16x8",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128Uint32x4",
+               name:    "Broadcast256Uint32x4",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast128Uint64x2",
+               name:    "Broadcast256Uint64x2",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Float32x4",
+               name:    "Broadcast512Float32x4",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Float64x2",
+               name:    "Broadcast512Float64x2",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Int8x16",
+               name:    "Broadcast512Int8x16",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Int16x8",
+               name:    "Broadcast512Int16x8",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Int32x4",
+               name:    "Broadcast512Int32x4",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256Int64x2",
+               name:    "Broadcast512Int64x2",
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedFloat32x4",
-               argLen:  2,
+               name:    "Broadcast512Uint8x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedFloat64x2",
-               argLen:  2,
+               name:    "Broadcast512Uint16x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedInt8x16",
-               argLen:  2,
+               name:    "Broadcast512Uint32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedInt16x8",
-               argLen:  2,
+               name:    "Broadcast512Uint64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedInt32x4",
-               argLen:  2,
+               name:    "CeilFloat32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedInt64x2",
-               argLen:  2,
+               name:    "CeilFloat32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedUint8x16",
-               argLen:  2,
+               name:    "CeilFloat64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedUint16x8",
-               argLen:  2,
+               name:    "CeilFloat64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedUint32x4",
+               name:    "CompressFloat32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "Broadcast256MaskedUint64x2",
+               name:    "CompressFloat32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "Broadcast256Uint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast256Uint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Float32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Float64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Int64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512MaskedUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Broadcast512Uint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CompressFloat32x16",
-               argLen:  2,
+               name:    "CompressFloat32x16",
+               argLen:  2,
                 generic: true,
         },
         {
@@ -65535,21 +64008,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "ConvertToInt32MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToInt32MaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToInt32MaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "ConvertToUint32Float32x4",
                 argLen:  1,
@@ -65565,21 +64023,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "ConvertToUint32MaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToUint32MaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ConvertToUint32MaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "CopySignInt8x16",
                 argLen:  2,
@@ -65640,36 +64083,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "DivMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DivMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "DotProdPairsInt16x8",
                 argLen:  2,
@@ -65685,36 +64098,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "DotProdPairsMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "DotProdPairsSaturatedMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "DotProdPairsSaturatedUint8x16",
                 argLen:  2,
@@ -65838,186 +64221,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "EqualMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "EqualMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:        "EqualUint8x16",
                 argLen:      2,
@@ -66260,21 +64463,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "GaloisFieldMulMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldMulMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldMulMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "GaloisFieldMulUint8x16",
                 argLen:  2,
@@ -66540,156 +64728,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "GreaterEqualMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterEqualMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "GreaterEqualUint8x64",
                 argLen:  2,
@@ -66800,156 +64838,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "GreaterMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GreaterMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "GreaterUint8x64",
                 argLen:  2,
@@ -67006,42 +64894,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "IsNanMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "IsNanMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:    "LessEqualFloat32x4",
                 argLen:  2,
@@ -67092,156 +64944,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "LessEqualMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessEqualMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "LessEqualUint8x64",
                 argLen:  2,
@@ -67312,156 +65014,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "LessMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "LessMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "LessUint8x64",
                 argLen:  2,
@@ -67591,4459 +65143,2257 @@ var opcodeTable = [...]opInfo{
                 generic:     true,
         },
         {
-               name:        "MaxMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MaxMaskedFloat32x8",
-               argLen:      3,
+               name:        "MaxUint8x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedFloat32x16",
-               argLen:      3,
+               name:        "MaxUint8x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedFloat64x2",
-               argLen:      3,
+               name:        "MaxUint8x64",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedFloat64x4",
-               argLen:      3,
+               name:        "MaxUint16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedFloat64x8",
-               argLen:      3,
+               name:        "MaxUint16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt8x16",
-               argLen:      3,
+               name:        "MaxUint16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt8x32",
-               argLen:      3,
+               name:        "MaxUint32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt8x64",
-               argLen:      3,
+               name:        "MaxUint32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt16x8",
-               argLen:      3,
+               name:        "MaxUint32x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt16x16",
-               argLen:      3,
+               name:        "MaxUint64x2",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt16x32",
-               argLen:      3,
+               name:        "MaxUint64x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt32x4",
-               argLen:      3,
+               name:        "MaxUint64x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt32x8",
-               argLen:      3,
+               name:        "MinFloat32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt32x16",
-               argLen:      3,
+               name:        "MinFloat32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt64x2",
-               argLen:      3,
+               name:        "MinFloat32x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt64x4",
-               argLen:      3,
+               name:        "MinFloat64x2",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedInt64x8",
-               argLen:      3,
+               name:        "MinFloat64x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint8x16",
-               argLen:      3,
+               name:        "MinFloat64x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint8x32",
-               argLen:      3,
+               name:        "MinInt8x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint8x64",
-               argLen:      3,
+               name:        "MinInt8x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint16x8",
-               argLen:      3,
+               name:        "MinInt8x64",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint16x16",
-               argLen:      3,
+               name:        "MinInt16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint16x32",
-               argLen:      3,
+               name:        "MinInt16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint32x4",
-               argLen:      3,
+               name:        "MinInt16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint32x8",
-               argLen:      3,
+               name:        "MinInt32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint32x16",
-               argLen:      3,
+               name:        "MinInt32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint64x2",
-               argLen:      3,
+               name:        "MinInt32x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint64x4",
-               argLen:      3,
+               name:        "MinInt64x2",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxMaskedUint64x8",
-               argLen:      3,
+               name:        "MinInt64x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint8x16",
+               name:        "MinInt64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint8x32",
+               name:        "MinUint8x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint8x64",
+               name:        "MinUint8x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint16x8",
+               name:        "MinUint8x64",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint16x16",
+               name:        "MinUint16x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint16x32",
+               name:        "MinUint16x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint32x4",
+               name:        "MinUint16x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint32x8",
+               name:        "MinUint32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint32x16",
+               name:        "MinUint32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint64x2",
+               name:        "MinUint32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint64x4",
+               name:        "MinUint64x2",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MaxUint64x8",
+               name:        "MinUint64x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinFloat32x4",
+               name:        "MinUint64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinFloat32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x4",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinFloat32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x8",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinFloat64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat32x16",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinFloat64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat64x2",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinFloat64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "MulAddFloat64x4",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinInt8x16",
+               name:    "MulAddFloat64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat32x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x2",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "MulAddSubFloat64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:        "MulEvenWidenInt32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt8x32",
+               name:        "MulEvenWidenInt32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt8x64",
+               name:        "MulEvenWidenUint32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt16x8",
+               name:        "MulEvenWidenUint32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt16x16",
+               name:        "MulFloat32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt16x32",
+               name:        "MulFloat32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt32x4",
+               name:        "MulFloat32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt32x8",
+               name:        "MulFloat64x2",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt32x16",
+               name:        "MulFloat64x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt64x2",
+               name:        "MulFloat64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt64x4",
+               name:        "MulHighInt16x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinInt64x8",
+               name:        "MulHighInt16x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat32x4",
-               argLen:      3,
+               name:        "MulHighInt16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat32x8",
-               argLen:      3,
+               name:        "MulHighUint16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat32x16",
-               argLen:      3,
+               name:        "MulHighUint16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat64x2",
-               argLen:      3,
+               name:        "MulHighUint16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat64x4",
-               argLen:      3,
+               name:        "MulInt16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedFloat64x8",
-               argLen:      3,
+               name:        "MulInt16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt8x16",
-               argLen:      3,
+               name:        "MulInt16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt8x32",
-               argLen:      3,
+               name:        "MulInt32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt8x64",
-               argLen:      3,
+               name:        "MulInt32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt16x8",
-               argLen:      3,
+               name:        "MulInt32x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt16x16",
-               argLen:      3,
+               name:        "MulInt64x2",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt16x32",
-               argLen:      3,
+               name:        "MulInt64x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt32x4",
-               argLen:      3,
+               name:        "MulInt64x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x4",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x8",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat32x16",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x2",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x4",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
+               name:    "MulSubAddFloat64x8",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MinMaskedUint8x32",
-               argLen:      3,
+               name:        "MulUint16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint8x64",
-               argLen:      3,
+               name:        "MulUint16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint16x8",
-               argLen:      3,
+               name:        "MulUint16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint16x16",
-               argLen:      3,
+               name:        "MulUint32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint16x32",
-               argLen:      3,
+               name:        "MulUint32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint32x4",
-               argLen:      3,
+               name:        "MulUint32x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint32x8",
-               argLen:      3,
+               name:        "MulUint64x2",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint32x16",
-               argLen:      3,
+               name:        "MulUint64x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint64x2",
-               argLen:      3,
+               name:        "MulUint64x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint64x4",
-               argLen:      3,
+               name:        "NotEqualFloat32x4",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinMaskedUint64x8",
-               argLen:      3,
+               name:        "NotEqualFloat32x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint8x16",
+               name:        "NotEqualFloat32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint8x32",
+               name:        "NotEqualFloat64x2",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint8x64",
+               name:        "NotEqualFloat64x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint16x8",
+               name:        "NotEqualFloat64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint16x16",
+               name:        "NotEqualInt8x64",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint16x32",
+               name:        "NotEqualInt16x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint32x4",
+               name:        "NotEqualInt32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint32x8",
+               name:        "NotEqualInt64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint32x16",
+               name:        "NotEqualUint8x64",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint64x2",
+               name:        "NotEqualUint16x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint64x4",
+               name:        "NotEqualUint32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MinUint64x8",
+               name:        "NotEqualUint64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:    "MulAddFloat32x4",
-               argLen:  3,
+               name:    "OnesCountInt8x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddFloat32x8",
-               argLen:  3,
+               name:    "OnesCountInt8x32",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddFloat32x16",
-               argLen:  3,
+               name:    "OnesCountInt8x64",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddFloat64x2",
-               argLen:  3,
+               name:    "OnesCountInt16x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddFloat64x4",
-               argLen:  3,
+               name:    "OnesCountInt16x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddFloat64x8",
-               argLen:  3,
+               name:    "OnesCountInt16x32",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat32x4",
-               argLen:  4,
+               name:    "OnesCountInt32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat32x8",
-               argLen:  4,
+               name:    "OnesCountInt32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat32x16",
-               argLen:  4,
+               name:    "OnesCountInt32x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat64x2",
-               argLen:  4,
+               name:    "OnesCountInt64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat64x4",
-               argLen:  4,
+               name:    "OnesCountInt64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddMaskedFloat64x8",
-               argLen:  4,
+               name:    "OnesCountInt64x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat32x4",
-               argLen:  3,
+               name:    "OnesCountUint8x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat32x8",
-               argLen:  3,
+               name:    "OnesCountUint8x32",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat32x16",
-               argLen:  3,
+               name:    "OnesCountUint8x64",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat64x2",
-               argLen:  3,
+               name:    "OnesCountUint16x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat64x4",
-               argLen:  3,
+               name:    "OnesCountUint16x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubFloat64x8",
-               argLen:  3,
+               name:    "OnesCountUint16x32",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat32x4",
-               argLen:  4,
+               name:    "OnesCountUint32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat32x8",
-               argLen:  4,
+               name:    "OnesCountUint32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat32x16",
-               argLen:  4,
+               name:    "OnesCountUint32x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat64x2",
-               argLen:  4,
+               name:    "OnesCountUint64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat64x4",
-               argLen:  4,
+               name:    "OnesCountUint64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "MulAddSubMaskedFloat64x8",
-               argLen:  4,
+               name:    "OnesCountUint64x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:        "MulEvenWidenInt32x4",
+               name:        "OrInt8x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulEvenWidenInt32x8",
+               name:        "OrInt8x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulEvenWidenUint32x4",
+               name:        "OrInt8x64",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulEvenWidenUint32x8",
+               name:        "OrInt16x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat32x4",
+               name:        "OrInt16x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat32x8",
+               name:        "OrInt16x32",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat32x16",
+               name:        "OrInt32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat64x2",
+               name:        "OrInt32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat64x4",
+               name:        "OrInt32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulFloat64x8",
+               name:        "OrInt64x2",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighInt16x8",
+               name:        "OrInt64x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighInt16x16",
+               name:        "OrInt64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighInt16x32",
+               name:        "OrUint8x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulHighMaskedInt16x16",
-               argLen:      3,
+               name:        "OrUint8x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighMaskedInt16x32",
-               argLen:      3,
+               name:        "OrUint8x64",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighMaskedUint16x8",
-               argLen:      3,
+               name:        "OrUint16x8",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighMaskedUint16x16",
-               argLen:      3,
+               name:        "OrUint16x16",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighMaskedUint16x32",
-               argLen:      3,
+               name:        "OrUint16x32",
+               argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighUint16x8",
+               name:        "OrUint32x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighUint16x16",
+               name:        "OrUint32x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulHighUint16x32",
+               name:        "OrUint32x16",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulInt16x8",
+               name:        "OrUint64x2",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulInt16x16",
+               name:        "OrUint64x4",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulInt16x32",
+               name:        "OrUint64x8",
                 argLen:      2,
                 commutative: true,
                 generic:     true,
         },
         {
-               name:        "MulInt32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x4",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MulInt32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x8",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MulInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float32x16",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MulInt64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
+               name:    "Permute2Float64x2",
+               argLen:  3,
+               generic: true,
         },
         {
-               name:        "MulInt64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "MulSubAddFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "MulSubAddMaskedFloat64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:        "MulUint16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "MulUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualFloat64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedFloat64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint8x64",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint16x32",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "NotEqualUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "OnesCountInt8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt8x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt8x64",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt16x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountInt64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountMaskedUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint8x64",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint16x32",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "OnesCountUint64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:        "OrInt8x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt8x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrInt64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint8x64",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint16x32",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint32x16",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x2",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x4",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "OrUint64x8",
-               argLen:      2,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:    "Permute2Float32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Float64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Int64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedFloat64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt8x64",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedInt64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint8x64",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2MaskedUint64x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "Permute2Uint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedFloat64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "PermuteUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalFloat64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalMaskedFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat32x16",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtFloat64x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ReciprocalSqrtMaskedFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateLeftUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightMaskedUint64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateRightUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat32x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat32x8",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat64x2",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenFloat64x4",
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ScaleMaskedFloat64x2",
+               name:    "Permute2Float64x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ScaleMaskedFloat64x4",
+               name:    "Permute2Float64x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ScaleMaskedFloat64x8",
+               name:    "Permute2Int8x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "SetHiFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetHiUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoFloat64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoInt64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "SetLoUint64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftInt32x8",
-               argLen:  2,
+               name:    "Permute2Int8x32",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftInt32x16",
-               argLen:  2,
+               name:    "Permute2Int8x64",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftInt64x2",
-               argLen:  2,
+               name:    "Permute2Int16x8",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftInt64x4",
-               argLen:  2,
+               name:    "Permute2Int16x16",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftInt64x8",
-               argLen:  2,
+               name:    "Permute2Int16x32",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt16x8",
+               name:    "Permute2Int32x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt16x16",
+               name:    "Permute2Int32x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt16x32",
+               name:    "Permute2Int32x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt32x4",
+               name:    "Permute2Int64x2",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt32x8",
+               name:    "Permute2Int64x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt32x16",
+               name:    "Permute2Int64x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt64x2",
+               name:    "Permute2Uint8x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt64x4",
+               name:    "Permute2Uint8x32",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedInt64x8",
+               name:    "Permute2Uint8x64",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint16x8",
+               name:    "Permute2Uint16x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint16x16",
+               name:    "Permute2Uint16x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint16x32",
+               name:    "Permute2Uint16x32",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint32x4",
+               name:    "Permute2Uint32x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint32x8",
+               name:    "Permute2Uint32x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint32x16",
+               name:    "Permute2Uint32x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint64x2",
+               name:    "Permute2Uint64x2",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint64x4",
+               name:    "Permute2Uint64x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftMaskedUint64x8",
+               name:    "Permute2Uint64x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint16x8",
+               name:    "PermuteFloat32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint16x16",
+               name:    "PermuteFloat32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint16x32",
+               name:    "PermuteFloat64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint32x4",
+               name:    "PermuteFloat64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint32x8",
+               name:    "PermuteInt8x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint32x16",
+               name:    "PermuteInt8x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint64x2",
+               name:    "PermuteInt8x64",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint64x4",
+               name:    "PermuteInt16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllLeftUint64x8",
+               name:    "PermuteInt16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt16x8",
+               name:    "PermuteInt16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt16x16",
+               name:    "PermuteInt32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt16x32",
+               name:    "PermuteInt32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt32x4",
+               name:    "PermuteInt64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt32x8",
+               name:    "PermuteInt64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt32x16",
+               name:    "PermuteUint8x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt64x2",
+               name:    "PermuteUint8x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt64x4",
+               name:    "PermuteUint8x64",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightInt64x8",
+               name:    "PermuteUint16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt16x8",
-               argLen:  3,
+               name:    "PermuteUint16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt16x16",
-               argLen:  3,
+               name:    "PermuteUint16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt16x32",
-               argLen:  3,
+               name:    "PermuteUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt32x4",
-               argLen:  3,
+               name:    "PermuteUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt32x8",
-               argLen:  3,
+               name:    "PermuteUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt32x16",
-               argLen:  3,
+               name:    "PermuteUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt64x2",
-               argLen:  3,
+               name:    "ReciprocalFloat32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt64x4",
-               argLen:  3,
+               name:    "ReciprocalFloat32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedInt64x8",
-               argLen:  3,
+               name:    "ReciprocalFloat32x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint16x8",
-               argLen:  3,
+               name:    "ReciprocalFloat64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint16x16",
-               argLen:  3,
+               name:    "ReciprocalFloat64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint16x32",
-               argLen:  3,
+               name:    "ReciprocalFloat64x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint32x4",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint32x8",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint32x16",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat32x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint64x2",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint64x4",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightMaskedUint64x8",
-               argLen:  3,
+               name:    "ReciprocalSqrtFloat64x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint16x8",
+               name:    "RotateLeftInt32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint16x16",
+               name:    "RotateLeftInt32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint16x32",
+               name:    "RotateLeftInt32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint32x4",
+               name:    "RotateLeftInt64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint32x8",
+               name:    "RotateLeftInt64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint32x16",
+               name:    "RotateLeftInt64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint64x2",
+               name:    "RotateLeftUint32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint64x4",
+               name:    "RotateLeftUint32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftAllRightUint64x8",
+               name:    "RotateLeftUint32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x2",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt16x32",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x4",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x8",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt32x16",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt64x2",
-               argLen:  4,
-               generic: true,
-       },
-       {
-               name:    "ShiftLeftConcatMaskedInt64x4",
-               argLen:  4,
+               name:    "RotateLeftUint64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedInt64x8",
-               argLen:  4,
+               name:    "RotateLeftUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint16x8",
-               argLen:  4,
+               name:    "RotateLeftUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint16x16",
-               argLen:  4,
+               name:    "RotateRightInt32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint16x32",
-               argLen:  4,
+               name:    "RotateRightInt32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint32x4",
-               argLen:  4,
+               name:    "RotateRightInt32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint32x8",
-               argLen:  4,
+               name:    "RotateRightInt64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint32x16",
-               argLen:  4,
+               name:    "RotateRightInt64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint64x2",
-               argLen:  4,
+               name:    "RotateRightInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint64x4",
-               argLen:  4,
+               name:    "RotateRightUint32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatMaskedUint64x8",
-               argLen:  4,
+               name:    "RotateRightUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint16x8",
-               argLen:  3,
+               name:    "RotateRightUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint16x16",
-               argLen:  3,
+               name:    "RotateRightUint64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint16x32",
-               argLen:  3,
+               name:    "RotateRightUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint32x4",
-               argLen:  3,
+               name:    "RotateRightUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint32x8",
-               argLen:  3,
+               name:    "RoundToEvenFloat32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint32x16",
-               argLen:  3,
+               name:    "RoundToEvenFloat32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint64x2",
-               argLen:  3,
+               name:    "RoundToEvenFloat64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint64x4",
-               argLen:  3,
+               name:    "RoundToEvenFloat64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "ShiftLeftConcatUint64x8",
-               argLen:  3,
+               name:    "ScaleFloat32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt16x8",
+               name:    "ScaleFloat32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt16x16",
+               name:    "ScaleFloat32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt16x32",
+               name:    "ScaleFloat64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt32x4",
+               name:    "ScaleFloat64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt32x8",
+               name:    "ScaleFloat64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt32x16",
+               name:    "SetHiFloat32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt64x2",
+               name:    "SetHiFloat32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt64x4",
+               name:    "SetHiFloat64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftInt64x8",
+               name:    "SetHiFloat64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt16x8",
-               argLen:  3,
+               name:    "SetHiInt8x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt16x16",
-               argLen:  3,
+               name:    "SetHiInt8x64",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt16x32",
-               argLen:  3,
+               name:    "SetHiInt16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt32x4",
-               argLen:  3,
+               name:    "SetHiInt16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt32x8",
-               argLen:  3,
+               name:    "SetHiInt32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt32x16",
-               argLen:  3,
+               name:    "SetHiInt32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt64x2",
-               argLen:  3,
+               name:    "SetHiInt64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt64x4",
-               argLen:  3,
+               name:    "SetHiInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedInt64x8",
-               argLen:  3,
+               name:    "SetHiUint8x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint16x8",
-               argLen:  3,
+               name:    "SetHiUint8x64",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint16x16",
-               argLen:  3,
+               name:    "SetHiUint16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint16x32",
-               argLen:  3,
+               name:    "SetHiUint16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint32x4",
-               argLen:  3,
+               name:    "SetHiUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint32x8",
-               argLen:  3,
+               name:    "SetHiUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint32x16",
-               argLen:  3,
+               name:    "SetHiUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint64x2",
-               argLen:  3,
+               name:    "SetHiUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint64x4",
-               argLen:  3,
+               name:    "SetLoFloat32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftMaskedUint64x8",
-               argLen:  3,
+               name:    "SetLoFloat32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint16x8",
+               name:    "SetLoFloat64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint16x16",
+               name:    "SetLoFloat64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint16x32",
+               name:    "SetLoInt8x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint32x4",
+               name:    "SetLoInt8x64",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint32x8",
+               name:    "SetLoInt16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint32x16",
+               name:    "SetLoInt16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint64x2",
+               name:    "SetLoInt32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint64x4",
+               name:    "SetLoInt32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftLeftUint64x8",
+               name:    "SetLoInt64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt16x8",
-               argLen:  3,
+               name:    "SetLoInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt16x16",
-               argLen:  3,
+               name:    "SetLoUint8x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt16x32",
-               argLen:  3,
+               name:    "SetLoUint8x64",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt32x4",
-               argLen:  3,
+               name:    "SetLoUint16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt32x8",
-               argLen:  3,
+               name:    "SetLoUint16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt32x16",
-               argLen:  3,
+               name:    "SetLoUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt64x2",
-               argLen:  3,
+               name:    "SetLoUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt64x4",
-               argLen:  3,
+               name:    "SetLoUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatInt64x8",
-               argLen:  3,
+               name:    "SetLoUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt16x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt16x16",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt16x32",
-               argLen:  4,
+               name:    "ShiftAllLeftInt16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt32x4",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt32x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt32x16",
-               argLen:  4,
+               name:    "ShiftAllLeftInt32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt64x2",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt64x4",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedInt64x8",
-               argLen:  4,
+               name:    "ShiftAllLeftInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint16x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint16x16",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint16x32",
-               argLen:  4,
+               name:    "ShiftAllLeftUint16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint32x4",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint32x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint32x16",
-               argLen:  4,
+               name:    "ShiftAllLeftUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint64x2",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint64x4",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatMaskedUint64x8",
-               argLen:  4,
+               name:    "ShiftAllLeftUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint16x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint16x16",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint16x32",
-               argLen:  3,
+               name:    "ShiftAllRightInt16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint32x4",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint32x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint32x16",
-               argLen:  3,
+               name:    "ShiftAllRightInt32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint64x2",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint64x4",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightConcatUint64x8",
-               argLen:  3,
+               name:    "ShiftAllRightInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt16x8",
+               name:    "ShiftAllRightUint16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt16x16",
+               name:    "ShiftAllRightUint16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt16x32",
+               name:    "ShiftAllRightUint16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt32x4",
+               name:    "ShiftAllRightUint32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt32x8",
+               name:    "ShiftAllRightUint32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt32x16",
+               name:    "ShiftAllRightUint32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt64x2",
+               name:    "ShiftAllRightUint64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt64x4",
+               name:    "ShiftAllRightUint64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightInt64x8",
+               name:    "ShiftAllRightUint64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt16x8",
+               name:    "ShiftLeftConcatInt16x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt16x16",
+               name:    "ShiftLeftConcatInt16x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt16x32",
+               name:    "ShiftLeftConcatInt16x32",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt32x4",
+               name:    "ShiftLeftConcatInt32x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt32x8",
+               name:    "ShiftLeftConcatInt32x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt32x16",
+               name:    "ShiftLeftConcatInt32x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt64x2",
+               name:    "ShiftLeftConcatInt64x2",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt64x4",
+               name:    "ShiftLeftConcatInt64x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedInt64x8",
+               name:    "ShiftLeftConcatInt64x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint16x8",
+               name:    "ShiftLeftConcatUint16x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint16x16",
+               name:    "ShiftLeftConcatUint16x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint16x32",
+               name:    "ShiftLeftConcatUint16x32",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint32x4",
+               name:    "ShiftLeftConcatUint32x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint32x8",
+               name:    "ShiftLeftConcatUint32x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint32x16",
+               name:    "ShiftLeftConcatUint32x16",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint64x2",
+               name:    "ShiftLeftConcatUint64x2",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint64x4",
+               name:    "ShiftLeftConcatUint64x4",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightMaskedUint64x8",
+               name:    "ShiftLeftConcatUint64x8",
                 argLen:  3,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint16x8",
+               name:    "ShiftLeftInt16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint16x16",
+               name:    "ShiftLeftInt16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint16x32",
+               name:    "ShiftLeftInt16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint32x4",
+               name:    "ShiftLeftInt32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint32x8",
+               name:    "ShiftLeftInt32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint32x16",
+               name:    "ShiftLeftInt32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint64x2",
+               name:    "ShiftLeftInt64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint64x4",
+               name:    "ShiftLeftInt64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "ShiftRightUint64x8",
+               name:    "ShiftLeftInt64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat32x4",
-               argLen:  1,
+               name:    "ShiftLeftUint16x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat32x8",
-               argLen:  1,
+               name:    "ShiftLeftUint16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat32x16",
-               argLen:  1,
+               name:    "ShiftLeftUint16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat64x2",
-               argLen:  1,
+               name:    "ShiftLeftUint32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat64x4",
-               argLen:  1,
+               name:    "ShiftLeftUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtFloat64x8",
-               argLen:  1,
+               name:    "ShiftLeftUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat32x4",
+               name:    "ShiftLeftUint64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat32x8",
+               name:    "ShiftLeftUint64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat32x16",
+               name:    "ShiftLeftUint64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat64x2",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x8",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat64x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x16",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SqrtMaskedFloat64x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt16x32",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat32x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x4",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat32x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x8",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat32x16",
-               argLen:  2,
+               name:    "ShiftRightConcatInt32x16",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat64x2",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x2",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat64x4",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x4",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubFloat64x8",
-               argLen:  2,
+               name:    "ShiftRightConcatInt64x8",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "SubInt8x16",
+               name:    "ShiftRightConcatUint16x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint16x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint16x32",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint32x16",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x2",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x4",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightConcatUint64x8",
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ShiftRightInt16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt8x32",
+               name:    "ShiftRightInt16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt8x64",
+               name:    "ShiftRightInt16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt16x8",
+               name:    "ShiftRightInt32x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt16x16",
+               name:    "ShiftRightInt32x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt16x32",
+               name:    "ShiftRightInt32x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt32x4",
+               name:    "ShiftRightInt64x2",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt32x8",
+               name:    "ShiftRightInt64x4",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt32x16",
+               name:    "ShiftRightInt64x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt64x2",
+               name:    "ShiftRightUint16x8",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt64x4",
+               name:    "ShiftRightUint16x16",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubInt64x8",
+               name:    "ShiftRightUint16x32",
                 argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat32x4",
-               argLen:  3,
+               name:    "ShiftRightUint32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat32x8",
-               argLen:  3,
+               name:    "ShiftRightUint32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat32x16",
-               argLen:  3,
+               name:    "ShiftRightUint32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat64x2",
-               argLen:  3,
+               name:    "ShiftRightUint64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat64x4",
-               argLen:  3,
+               name:    "ShiftRightUint64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedFloat64x8",
-               argLen:  3,
+               name:    "ShiftRightUint64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt8x16",
-               argLen:  3,
+               name:    "SqrtFloat32x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt8x32",
-               argLen:  3,
+               name:    "SqrtFloat32x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt8x64",
-               argLen:  3,
+               name:    "SqrtFloat32x16",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt16x8",
-               argLen:  3,
+               name:    "SqrtFloat64x2",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt16x16",
-               argLen:  3,
+               name:    "SqrtFloat64x4",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt16x32",
-               argLen:  3,
+               name:    "SqrtFloat64x8",
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt32x4",
-               argLen:  3,
+               name:    "SubFloat32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt32x8",
-               argLen:  3,
+               name:    "SubFloat32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt32x16",
-               argLen:  3,
+               name:    "SubFloat32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt64x2",
-               argLen:  3,
+               name:    "SubFloat64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt64x4",
-               argLen:  3,
+               name:    "SubFloat64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedInt64x8",
-               argLen:  3,
+               name:    "SubFloat64x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint8x16",
-               argLen:  3,
+               name:    "SubInt8x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint8x32",
-               argLen:  3,
+               name:    "SubInt8x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint8x64",
-               argLen:  3,
+               name:    "SubInt8x64",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint16x8",
-               argLen:  3,
+               name:    "SubInt16x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint16x16",
-               argLen:  3,
+               name:    "SubInt16x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint16x32",
-               argLen:  3,
+               name:    "SubInt16x32",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint32x4",
-               argLen:  3,
+               name:    "SubInt32x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint32x8",
-               argLen:  3,
+               name:    "SubInt32x8",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint32x16",
-               argLen:  3,
+               name:    "SubInt32x16",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint64x2",
-               argLen:  3,
+               name:    "SubInt64x2",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint64x4",
-               argLen:  3,
+               name:    "SubInt64x4",
+               argLen:  2,
                 generic: true,
         },
         {
-               name:    "SubMaskedUint64x8",
-               argLen:  3,
+               name:    "SubInt64x8",
+               argLen:  2,
                 generic: true,
         },
         {
@@ -72146,66 +67496,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "SubSaturatedMaskedInt8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedInt16x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x32",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint8x64",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "SubSaturatedMaskedUint16x32",
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "SubSaturatedUint8x16",
                 argLen:  2,
@@ -72388,78 +67678,6 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
-       {
-               name:        "XorMaskedInt32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedInt64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint32x16",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x2",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x4",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
-       {
-               name:        "XorMaskedUint64x8",
-               argLen:      3,
-               commutative: true,
-               generic:     true,
-       },
         {
                 name:        "XorUint8x16",
                 argLen:      2,
@@ -72553,319 +67771,157 @@ var opcodeTable = [...]opInfo{
                 generic: true,
         },
         {
-               name:    "blendMaskedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "blendMaskedInt64x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat32x16",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x2",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledFloat64x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat32x16",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x2",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x4",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueFloat64x8",
-               auxType: auxUInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "CeilScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "FloorScaledFloat32x4",
-               auxType: auxUInt8,
-               argLen:  1,
+               name:    "blendMaskedInt32x16",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "FloorScaledFloat32x8",
-               auxType: auxUInt8,
-               argLen:  1,
+               name:    "blendMaskedInt64x8",
+               argLen:  3,
                 generic: true,
         },
         {
-               name:    "FloorScaledFloat32x16",
+               name:    "CeilScaledFloat32x4",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledFloat64x2",
+               name:    "CeilScaledFloat32x8",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledFloat64x4",
+               name:    "CeilScaledFloat32x16",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledFloat64x8",
+               name:    "CeilScaledFloat64x2",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "FloorScaledMaskedFloat32x8",
+               name:    "CeilScaledFloat64x4",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledMaskedFloat32x16",
+               name:    "CeilScaledFloat64x8",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledMaskedFloat64x2",
+               name:    "CeilScaledResidueFloat32x4",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledMaskedFloat64x4",
+               name:    "CeilScaledResidueFloat32x8",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledMaskedFloat64x8",
+               name:    "CeilScaledResidueFloat32x16",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat32x4",
+               name:    "CeilScaledResidueFloat64x2",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat32x8",
+               name:    "CeilScaledResidueFloat64x4",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat32x16",
+               name:    "CeilScaledResidueFloat64x8",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat64x2",
+               name:    "FloorScaledFloat32x4",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat64x4",
+               name:    "FloorScaledFloat32x8",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueFloat64x8",
+               name:    "FloorScaledFloat32x16",
                 auxType: auxUInt8,
                 argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat32x4",
+               name:    "FloorScaledFloat64x2",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat32x8",
+               name:    "FloorScaledFloat64x4",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat32x16",
+               name:    "FloorScaledFloat64x8",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat64x2",
+               name:    "FloorScaledResidueFloat32x4",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat64x4",
+               name:    "FloorScaledResidueFloat32x8",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "FloorScaledResidueMaskedFloat64x8",
+               name:    "FloorScaledResidueFloat32x16",
                 auxType: auxUInt8,
-               argLen:  2,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x16",
+               name:    "FloorScaledResidueFloat64x2",
                 auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x32",
+               name:    "FloorScaledResidueFloat64x4",
                 auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                 generic: true,
         },
         {
-               name:    "GaloisFieldAffineTransformInverseMaskedUint8x64",
+               name:    "FloorScaledResidueFloat64x8",
                 auxType: auxUInt8,
-               argLen:  3,
+               argLen:  1,
                 generic: true,
         },
         {
@@ -72886,24 +67942,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "GaloisFieldAffineTransformMaskedUint8x64",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "GaloisFieldAffineTransformUint8x16",
                 auxType: auxUInt8,
@@ -73018,78 +68056,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "RotateAllLeftMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllLeftMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "RotateAllLeftUint32x4",
                 auxType: auxUInt8,
@@ -73162,78 +68128,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "RotateAllRightMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RotateAllRightMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "RotateAllRightUint32x4",
                 auxType: auxUInt8,
@@ -73306,42 +68200,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "RoundToEvenScaledResidueFloat32x4",
                 auxType: auxUInt8,
@@ -73378,42 +68236,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "RoundToEvenScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "SetElemFloat32x4",
                 auxType: auxUInt8,
@@ -73528,114 +68350,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllLeftConcatMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "ShiftAllLeftConcatUint16x8",
                 auxType: auxUInt8,
@@ -73744,114 +68458,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedInt64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint16x32",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint32x16",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x2",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x4",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "ShiftAllRightConcatMaskedUint64x8",
-               auxType: auxUInt8,
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "ShiftAllRightConcatUint16x8",
                 auxType: auxUInt8,
@@ -73942,42 +68548,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "TruncScaledMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
         {
                 name:    "TruncScaledResidueFloat32x4",
                 auxType: auxUInt8,
@@ -74014,42 +68584,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat32x16",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x2",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x4",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "TruncScaledResidueMaskedFloat64x8",
-               auxType: auxUInt8,
-               argLen:  2,
-               generic: true,
-       },
  }
  
  func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 69393014c78a85f21e282c812beccd5c36906996..87b1e0586d711be4708de05ef45ee487dafee822 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -537,72 +537,36 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpAMD64VPSLLD256(v)
         case OpAMD64VPSLLD512:
                 return rewriteValueAMD64_OpAMD64VPSLLD512(v)
-       case OpAMD64VPSLLDMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v)
-       case OpAMD64VPSLLDMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v)
-       case OpAMD64VPSLLDMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v)
         case OpAMD64VPSLLQ128:
                 return rewriteValueAMD64_OpAMD64VPSLLQ128(v)
         case OpAMD64VPSLLQ256:
                 return rewriteValueAMD64_OpAMD64VPSLLQ256(v)
         case OpAMD64VPSLLQ512:
                 return rewriteValueAMD64_OpAMD64VPSLLQ512(v)
-       case OpAMD64VPSLLQMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v)
-       case OpAMD64VPSLLQMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v)
-       case OpAMD64VPSLLQMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v)
         case OpAMD64VPSLLW128:
                 return rewriteValueAMD64_OpAMD64VPSLLW128(v)
         case OpAMD64VPSLLW256:
                 return rewriteValueAMD64_OpAMD64VPSLLW256(v)
         case OpAMD64VPSLLW512:
                 return rewriteValueAMD64_OpAMD64VPSLLW512(v)
-       case OpAMD64VPSLLWMasked128:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v)
-       case OpAMD64VPSLLWMasked256:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v)
-       case OpAMD64VPSLLWMasked512:
-               return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v)
         case OpAMD64VPSRAD128:
                 return rewriteValueAMD64_OpAMD64VPSRAD128(v)
         case OpAMD64VPSRAD256:
                 return rewriteValueAMD64_OpAMD64VPSRAD256(v)
         case OpAMD64VPSRAD512:
                 return rewriteValueAMD64_OpAMD64VPSRAD512(v)
-       case OpAMD64VPSRADMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked128(v)
-       case OpAMD64VPSRADMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked256(v)
-       case OpAMD64VPSRADMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRADMasked512(v)
         case OpAMD64VPSRAQ128:
                 return rewriteValueAMD64_OpAMD64VPSRAQ128(v)
         case OpAMD64VPSRAQ256:
                 return rewriteValueAMD64_OpAMD64VPSRAQ256(v)
         case OpAMD64VPSRAQ512:
                 return rewriteValueAMD64_OpAMD64VPSRAQ512(v)
-       case OpAMD64VPSRAQMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v)
-       case OpAMD64VPSRAQMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v)
-       case OpAMD64VPSRAQMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v)
         case OpAMD64VPSRAW128:
                 return rewriteValueAMD64_OpAMD64VPSRAW128(v)
         case OpAMD64VPSRAW256:
                 return rewriteValueAMD64_OpAMD64VPSRAW256(v)
         case OpAMD64VPSRAW512:
                 return rewriteValueAMD64_OpAMD64VPSRAW512(v)
-       case OpAMD64VPSRAWMasked128:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v)
-       case OpAMD64VPSRAWMasked256:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
-       case OpAMD64VPSRAWMasked512:
-               return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
         case OpAMD64XADDLlock:
                 return rewriteValueAMD64_OpAMD64XADDLlock(v)
         case OpAMD64XADDQlock:
@@ -667,30 +631,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAbsInt8x64:
                 v.Op = OpAMD64VPABSB512
                 return true
-       case OpAbsMaskedInt16x16:
-               return rewriteValueAMD64_OpAbsMaskedInt16x16(v)
-       case OpAbsMaskedInt16x32:
-               return rewriteValueAMD64_OpAbsMaskedInt16x32(v)
-       case OpAbsMaskedInt16x8:
-               return rewriteValueAMD64_OpAbsMaskedInt16x8(v)
-       case OpAbsMaskedInt32x16:
-               return rewriteValueAMD64_OpAbsMaskedInt32x16(v)
-       case OpAbsMaskedInt32x4:
-               return rewriteValueAMD64_OpAbsMaskedInt32x4(v)
-       case OpAbsMaskedInt32x8:
-               return rewriteValueAMD64_OpAbsMaskedInt32x8(v)
-       case OpAbsMaskedInt64x2:
-               return rewriteValueAMD64_OpAbsMaskedInt64x2(v)
-       case OpAbsMaskedInt64x4:
-               return rewriteValueAMD64_OpAbsMaskedInt64x4(v)
-       case OpAbsMaskedInt64x8:
-               return rewriteValueAMD64_OpAbsMaskedInt64x8(v)
-       case OpAbsMaskedInt8x16:
-               return rewriteValueAMD64_OpAbsMaskedInt8x16(v)
-       case OpAbsMaskedInt8x32:
-               return rewriteValueAMD64_OpAbsMaskedInt8x32(v)
-       case OpAbsMaskedInt8x64:
-               return rewriteValueAMD64_OpAbsMaskedInt8x64(v)
         case OpAdd16:
                 v.Op = OpAMD64ADDL
                 return true
@@ -718,12 +658,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddDotProdPairsSaturatedInt32x8:
                 v.Op = OpAMD64VPDPWSSDS256
                 return true
-       case OpAddDotProdPairsSaturatedMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v)
-       case OpAddDotProdPairsSaturatedMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v)
-       case OpAddDotProdPairsSaturatedMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v)
         case OpAddDotProdQuadrupleInt32x16:
                 v.Op = OpAMD64VPDPBUSD512
                 return true
@@ -733,12 +667,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddDotProdQuadrupleInt32x8:
                 v.Op = OpAMD64VPDPBUSD256
                 return true
-       case OpAddDotProdQuadrupleMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v)
-       case OpAddDotProdQuadrupleMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v)
-       case OpAddDotProdQuadrupleMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v)
         case OpAddDotProdQuadrupleSaturatedInt32x16:
                 v.Op = OpAMD64VPDPBUSDS512
                 return true
@@ -748,12 +676,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddDotProdQuadrupleSaturatedInt32x8:
                 v.Op = OpAMD64VPDPBUSDS256
                 return true
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x16:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v)
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x4:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v)
-       case OpAddDotProdQuadrupleSaturatedMaskedInt32x8:
-               return rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v)
         case OpAddFloat32x16:
                 v.Op = OpAMD64VADDPS512
                 return true
@@ -808,66 +730,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddInt8x64:
                 v.Op = OpAMD64VPADDB512
                 return true
-       case OpAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpAddMaskedFloat32x16(v)
-       case OpAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpAddMaskedFloat32x4(v)
-       case OpAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpAddMaskedFloat32x8(v)
-       case OpAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpAddMaskedFloat64x2(v)
-       case OpAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpAddMaskedFloat64x4(v)
-       case OpAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpAddMaskedFloat64x8(v)
-       case OpAddMaskedInt16x16:
-               return rewriteValueAMD64_OpAddMaskedInt16x16(v)
-       case OpAddMaskedInt16x32:
-               return rewriteValueAMD64_OpAddMaskedInt16x32(v)
-       case OpAddMaskedInt16x8:
-               return rewriteValueAMD64_OpAddMaskedInt16x8(v)
-       case OpAddMaskedInt32x16:
-               return rewriteValueAMD64_OpAddMaskedInt32x16(v)
-       case OpAddMaskedInt32x4:
-               return rewriteValueAMD64_OpAddMaskedInt32x4(v)
-       case OpAddMaskedInt32x8:
-               return rewriteValueAMD64_OpAddMaskedInt32x8(v)
-       case OpAddMaskedInt64x2:
-               return rewriteValueAMD64_OpAddMaskedInt64x2(v)
-       case OpAddMaskedInt64x4:
-               return rewriteValueAMD64_OpAddMaskedInt64x4(v)
-       case OpAddMaskedInt64x8:
-               return rewriteValueAMD64_OpAddMaskedInt64x8(v)
-       case OpAddMaskedInt8x16:
-               return rewriteValueAMD64_OpAddMaskedInt8x16(v)
-       case OpAddMaskedInt8x32:
-               return rewriteValueAMD64_OpAddMaskedInt8x32(v)
-       case OpAddMaskedInt8x64:
-               return rewriteValueAMD64_OpAddMaskedInt8x64(v)
-       case OpAddMaskedUint16x16:
-               return rewriteValueAMD64_OpAddMaskedUint16x16(v)
-       case OpAddMaskedUint16x32:
-               return rewriteValueAMD64_OpAddMaskedUint16x32(v)
-       case OpAddMaskedUint16x8:
-               return rewriteValueAMD64_OpAddMaskedUint16x8(v)
-       case OpAddMaskedUint32x16:
-               return rewriteValueAMD64_OpAddMaskedUint32x16(v)
-       case OpAddMaskedUint32x4:
-               return rewriteValueAMD64_OpAddMaskedUint32x4(v)
-       case OpAddMaskedUint32x8:
-               return rewriteValueAMD64_OpAddMaskedUint32x8(v)
-       case OpAddMaskedUint64x2:
-               return rewriteValueAMD64_OpAddMaskedUint64x2(v)
-       case OpAddMaskedUint64x4:
-               return rewriteValueAMD64_OpAddMaskedUint64x4(v)
-       case OpAddMaskedUint64x8:
-               return rewriteValueAMD64_OpAddMaskedUint64x8(v)
-       case OpAddMaskedUint8x16:
-               return rewriteValueAMD64_OpAddMaskedUint8x16(v)
-       case OpAddMaskedUint8x32:
-               return rewriteValueAMD64_OpAddMaskedUint8x32(v)
-       case OpAddMaskedUint8x64:
-               return rewriteValueAMD64_OpAddMaskedUint8x64(v)
         case OpAddPairsFloat32x4:
                 v.Op = OpAMD64VHADDPS128
                 return true
@@ -931,30 +793,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAddSaturatedInt8x64:
                 v.Op = OpAMD64VPADDSB512
                 return true
-       case OpAddSaturatedMaskedInt16x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v)
-       case OpAddSaturatedMaskedInt16x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v)
-       case OpAddSaturatedMaskedInt16x8:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v)
-       case OpAddSaturatedMaskedInt8x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v)
-       case OpAddSaturatedMaskedInt8x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v)
-       case OpAddSaturatedMaskedInt8x64:
-               return rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v)
-       case OpAddSaturatedMaskedUint16x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v)
-       case OpAddSaturatedMaskedUint16x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v)
-       case OpAddSaturatedMaskedUint16x8:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v)
-       case OpAddSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v)
-       case OpAddSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v)
-       case OpAddSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v)
         case OpAddSaturatedUint16x16:
                 v.Op = OpAMD64VPADDUSW256
                 return true
@@ -1074,30 +912,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAndInt8x64:
                 v.Op = OpAMD64VPANDD512
                 return true
-       case OpAndMaskedInt32x16:
-               return rewriteValueAMD64_OpAndMaskedInt32x16(v)
-       case OpAndMaskedInt32x4:
-               return rewriteValueAMD64_OpAndMaskedInt32x4(v)
-       case OpAndMaskedInt32x8:
-               return rewriteValueAMD64_OpAndMaskedInt32x8(v)
-       case OpAndMaskedInt64x2:
-               return rewriteValueAMD64_OpAndMaskedInt64x2(v)
-       case OpAndMaskedInt64x4:
-               return rewriteValueAMD64_OpAndMaskedInt64x4(v)
-       case OpAndMaskedInt64x8:
-               return rewriteValueAMD64_OpAndMaskedInt64x8(v)
-       case OpAndMaskedUint32x16:
-               return rewriteValueAMD64_OpAndMaskedUint32x16(v)
-       case OpAndMaskedUint32x4:
-               return rewriteValueAMD64_OpAndMaskedUint32x4(v)
-       case OpAndMaskedUint32x8:
-               return rewriteValueAMD64_OpAndMaskedUint32x8(v)
-       case OpAndMaskedUint64x2:
-               return rewriteValueAMD64_OpAndMaskedUint64x2(v)
-       case OpAndMaskedUint64x4:
-               return rewriteValueAMD64_OpAndMaskedUint64x4(v)
-       case OpAndMaskedUint64x8:
-               return rewriteValueAMD64_OpAndMaskedUint64x8(v)
         case OpAndNotInt16x16:
                 v.Op = OpAMD64VPANDN256
                 return true
@@ -1134,30 +948,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpAndNotInt8x64:
                 v.Op = OpAMD64VPANDND512
                 return true
-       case OpAndNotMaskedInt32x16:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x16(v)
-       case OpAndNotMaskedInt32x4:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x4(v)
-       case OpAndNotMaskedInt32x8:
-               return rewriteValueAMD64_OpAndNotMaskedInt32x8(v)
-       case OpAndNotMaskedInt64x2:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x2(v)
-       case OpAndNotMaskedInt64x4:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x4(v)
-       case OpAndNotMaskedInt64x8:
-               return rewriteValueAMD64_OpAndNotMaskedInt64x8(v)
-       case OpAndNotMaskedUint32x16:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x16(v)
-       case OpAndNotMaskedUint32x4:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x4(v)
-       case OpAndNotMaskedUint32x8:
-               return rewriteValueAMD64_OpAndNotMaskedUint32x8(v)
-       case OpAndNotMaskedUint64x2:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x2(v)
-       case OpAndNotMaskedUint64x4:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x4(v)
-       case OpAndNotMaskedUint64x8:
-               return rewriteValueAMD64_OpAndNotMaskedUint64x8(v)
         case OpAndNotUint16x16:
                 v.Op = OpAMD64VPANDN256
                 return true
@@ -1276,18 +1066,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpAtomicStore8(v)
         case OpAtomicStorePtrNoWB:
                 return rewriteValueAMD64_OpAtomicStorePtrNoWB(v)
-       case OpAverageMaskedUint16x16:
-               return rewriteValueAMD64_OpAverageMaskedUint16x16(v)
-       case OpAverageMaskedUint16x32:
-               return rewriteValueAMD64_OpAverageMaskedUint16x32(v)
-       case OpAverageMaskedUint16x8:
-               return rewriteValueAMD64_OpAverageMaskedUint16x8(v)
-       case OpAverageMaskedUint8x16:
-               return rewriteValueAMD64_OpAverageMaskedUint8x16(v)
-       case OpAverageMaskedUint8x32:
-               return rewriteValueAMD64_OpAverageMaskedUint8x32(v)
-       case OpAverageMaskedUint8x64:
-               return rewriteValueAMD64_OpAverageMaskedUint8x64(v)
         case OpAverageUint16x16:
                 v.Op = OpAMD64VPAVGW256
                 return true
@@ -1335,26 +1113,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpBroadcast128Int8x16:
                 v.Op = OpAMD64VPBROADCASTB128
                 return true
-       case OpBroadcast128MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v)
-       case OpBroadcast128MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v)
-       case OpBroadcast128MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v)
-       case OpBroadcast128MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v)
-       case OpBroadcast128MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v)
-       case OpBroadcast128MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v)
-       case OpBroadcast128MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v)
-       case OpBroadcast128MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v)
-       case OpBroadcast128MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v)
-       case OpBroadcast128MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v)
         case OpBroadcast128Uint16x8:
                 v.Op = OpAMD64VPBROADCASTW128
                 return true
@@ -1385,26 +1143,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpBroadcast256Int8x16:
                 v.Op = OpAMD64VPBROADCASTB256
                 return true
-       case OpBroadcast256MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v)
-       case OpBroadcast256MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v)
-       case OpBroadcast256MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v)
-       case OpBroadcast256MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v)
-       case OpBroadcast256MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v)
-       case OpBroadcast256MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v)
-       case OpBroadcast256MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v)
-       case OpBroadcast256MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v)
-       case OpBroadcast256MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v)
-       case OpBroadcast256MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v)
         case OpBroadcast256Uint16x8:
                 v.Op = OpAMD64VPBROADCASTW256
                 return true
@@ -1435,26 +1173,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpBroadcast512Int8x16:
                 v.Op = OpAMD64VPBROADCASTB512
                 return true
-       case OpBroadcast512MaskedFloat32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v)
-       case OpBroadcast512MaskedFloat64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v)
-       case OpBroadcast512MaskedInt16x8:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v)
-       case OpBroadcast512MaskedInt32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v)
-       case OpBroadcast512MaskedInt64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v)
-       case OpBroadcast512MaskedInt8x16:
-               return rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v)
-       case OpBroadcast512MaskedUint16x8:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v)
-       case OpBroadcast512MaskedUint32x4:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v)
-       case OpBroadcast512MaskedUint64x2:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v)
-       case OpBroadcast512MaskedUint8x16:
-               return rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v)
         case OpBroadcast512Uint16x8:
                 v.Op = OpAMD64VPBROADCASTW512
                 return true
@@ -1497,18 +1215,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpCeilScaledFloat64x4(v)
         case OpCeilScaledFloat64x8:
                 return rewriteValueAMD64_OpCeilScaledFloat64x8(v)
-       case OpCeilScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v)
-       case OpCeilScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v)
-       case OpCeilScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v)
-       case OpCeilScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v)
-       case OpCeilScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v)
-       case OpCeilScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v)
         case OpCeilScaledResidueFloat32x16:
                 return rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v)
         case OpCeilScaledResidueFloat32x4:
@@ -1521,18 +1227,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v)
         case OpCeilScaledResidueFloat64x8:
                 return rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v)
-       case OpCeilScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v)
-       case OpCeilScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v)
-       case OpCeilScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v)
-       case OpCeilScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v)
-       case OpCeilScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v)
-       case OpCeilScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v)
         case OpClosureCall:
                 v.Op = OpAMD64CALLclosure
                 return true
@@ -1639,12 +1333,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpConvertToInt32Float32x8:
                 v.Op = OpAMD64VCVTTPS2DQ256
                 return true
-       case OpConvertToInt32MaskedFloat32x16:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v)
-       case OpConvertToInt32MaskedFloat32x4:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v)
-       case OpConvertToInt32MaskedFloat32x8:
-               return rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v)
         case OpConvertToUint32Float32x16:
                 v.Op = OpAMD64VCVTPS2UDQ512
                 return true
@@ -1654,12 +1342,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpConvertToUint32Float32x8:
                 v.Op = OpAMD64VCVTPS2UDQ256
                 return true
-       case OpConvertToUint32MaskedFloat32x16:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v)
-       case OpConvertToUint32MaskedFloat32x4:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v)
-       case OpConvertToUint32MaskedFloat32x8:
-               return rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v)
         case OpCopySignInt16x16:
                 v.Op = OpAMD64VPSIGNW256
                 return true
@@ -1818,18 +1500,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpDivFloat64x8:
                 v.Op = OpAMD64VDIVPD512
                 return true
-       case OpDivMaskedFloat32x16:
-               return rewriteValueAMD64_OpDivMaskedFloat32x16(v)
-       case OpDivMaskedFloat32x4:
-               return rewriteValueAMD64_OpDivMaskedFloat32x4(v)
-       case OpDivMaskedFloat32x8:
-               return rewriteValueAMD64_OpDivMaskedFloat32x8(v)
-       case OpDivMaskedFloat64x2:
-               return rewriteValueAMD64_OpDivMaskedFloat64x2(v)
-       case OpDivMaskedFloat64x4:
-               return rewriteValueAMD64_OpDivMaskedFloat64x4(v)
-       case OpDivMaskedFloat64x8:
-               return rewriteValueAMD64_OpDivMaskedFloat64x8(v)
         case OpDotProdPairsInt16x16:
                 v.Op = OpAMD64VPMADDWD256
                 return true
@@ -1839,18 +1509,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpDotProdPairsInt16x8:
                 v.Op = OpAMD64VPMADDWD128
                 return true
-       case OpDotProdPairsMaskedInt16x16:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v)
-       case OpDotProdPairsMaskedInt16x32:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v)
-       case OpDotProdPairsMaskedInt16x8:
-               return rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v)
-       case OpDotProdPairsSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v)
-       case OpDotProdPairsSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v)
-       case OpDotProdPairsSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v)
         case OpDotProdPairsSaturatedUint8x16:
                 v.Op = OpAMD64VPMADDUBSW128
                 return true
@@ -1920,66 +1578,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return true
         case OpEqualInt8x64:
                 return rewriteValueAMD64_OpEqualInt8x64(v)
-       case OpEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x16(v)
-       case OpEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x4(v)
-       case OpEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpEqualMaskedFloat32x8(v)
-       case OpEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x2(v)
-       case OpEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x4(v)
-       case OpEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpEqualMaskedFloat64x8(v)
-       case OpEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpEqualMaskedInt16x16(v)
-       case OpEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpEqualMaskedInt16x32(v)
-       case OpEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpEqualMaskedInt16x8(v)
-       case OpEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpEqualMaskedInt32x16(v)
-       case OpEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpEqualMaskedInt32x4(v)
-       case OpEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpEqualMaskedInt32x8(v)
-       case OpEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpEqualMaskedInt64x2(v)
-       case OpEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpEqualMaskedInt64x4(v)
-       case OpEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpEqualMaskedInt64x8(v)
-       case OpEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpEqualMaskedInt8x16(v)
-       case OpEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpEqualMaskedInt8x32(v)
-       case OpEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpEqualMaskedInt8x64(v)
-       case OpEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpEqualMaskedUint16x16(v)
-       case OpEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpEqualMaskedUint16x32(v)
-       case OpEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpEqualMaskedUint16x8(v)
-       case OpEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpEqualMaskedUint32x16(v)
-       case OpEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpEqualMaskedUint32x4(v)
-       case OpEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpEqualMaskedUint32x8(v)
-       case OpEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpEqualMaskedUint64x2(v)
-       case OpEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpEqualMaskedUint64x4(v)
-       case OpEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpEqualMaskedUint64x8(v)
-       case OpEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpEqualMaskedUint8x16(v)
-       case OpEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpEqualMaskedUint8x32(v)
-       case OpEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
         case OpEqualUint16x16:
                 v.Op = OpAMD64VPCMPEQW256
                 return true
@@ -2096,18 +1694,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpFloorScaledFloat64x4(v)
         case OpFloorScaledFloat64x8:
                 return rewriteValueAMD64_OpFloorScaledFloat64x8(v)
-       case OpFloorScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v)
-       case OpFloorScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v)
-       case OpFloorScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v)
-       case OpFloorScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v)
-       case OpFloorScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v)
-       case OpFloorScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v)
         case OpFloorScaledResidueFloat32x16:
                 return rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v)
         case OpFloorScaledResidueFloat32x4:
@@ -2120,24 +1706,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v)
         case OpFloorScaledResidueFloat64x8:
                 return rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v)
-       case OpFloorScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v)
-       case OpFloorScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v)
-       case OpFloorScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v)
-       case OpFloorScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v)
-       case OpFloorScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v)
-       case OpFloorScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v)
-       case OpGaloisFieldAffineTransformInverseMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v)
         case OpGaloisFieldAffineTransformInverseUint8x16:
                 v.Op = OpAMD64VGF2P8AFFINEINVQB128
                 return true
@@ -2147,12 +1715,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpGaloisFieldAffineTransformInverseUint8x64:
                 v.Op = OpAMD64VGF2P8AFFINEINVQB512
                 return true
-       case OpGaloisFieldAffineTransformMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v)
-       case OpGaloisFieldAffineTransformMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v)
-       case OpGaloisFieldAffineTransformMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v)
         case OpGaloisFieldAffineTransformUint8x16:
                 v.Op = OpAMD64VGF2P8AFFINEQB128
                 return true
@@ -2162,12 +1724,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpGaloisFieldAffineTransformUint8x64:
                 v.Op = OpAMD64VGF2P8AFFINEQB512
                 return true
-       case OpGaloisFieldMulMaskedUint8x16:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v)
-       case OpGaloisFieldMulMaskedUint8x32:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v)
-       case OpGaloisFieldMulMaskedUint8x64:
-               return rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v)
         case OpGaloisFieldMulUint8x16:
                 v.Op = OpAMD64VGF2P8MULB128
                 return true
@@ -2318,66 +1874,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpGreaterEqualInt64x8(v)
         case OpGreaterEqualInt8x64:
                 return rewriteValueAMD64_OpGreaterEqualInt8x64(v)
-       case OpGreaterEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v)
-       case OpGreaterEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v)
-       case OpGreaterEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v)
-       case OpGreaterEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v)
-       case OpGreaterEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v)
-       case OpGreaterEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v)
-       case OpGreaterEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v)
-       case OpGreaterEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v)
-       case OpGreaterEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v)
-       case OpGreaterEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v)
-       case OpGreaterEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v)
-       case OpGreaterEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v)
-       case OpGreaterEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v)
-       case OpGreaterEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v)
-       case OpGreaterEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v)
-       case OpGreaterEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v)
-       case OpGreaterEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v)
-       case OpGreaterEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v)
-       case OpGreaterEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v)
-       case OpGreaterEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v)
-       case OpGreaterEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v)
-       case OpGreaterEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v)
-       case OpGreaterEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v)
-       case OpGreaterEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v)
-       case OpGreaterEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v)
-       case OpGreaterEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v)
-       case OpGreaterEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v)
-       case OpGreaterEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v)
-       case OpGreaterEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v)
-       case OpGreaterEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v)
         case OpGreaterEqualUint16x32:
                 return rewriteValueAMD64_OpGreaterEqualUint16x32(v)
         case OpGreaterEqualUint32x16:
@@ -2430,66 +1926,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return true
         case OpGreaterInt8x64:
                 return rewriteValueAMD64_OpGreaterInt8x64(v)
-       case OpGreaterMaskedFloat32x16:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x16(v)
-       case OpGreaterMaskedFloat32x4:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x4(v)
-       case OpGreaterMaskedFloat32x8:
-               return rewriteValueAMD64_OpGreaterMaskedFloat32x8(v)
-       case OpGreaterMaskedFloat64x2:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x2(v)
-       case OpGreaterMaskedFloat64x4:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x4(v)
-       case OpGreaterMaskedFloat64x8:
-               return rewriteValueAMD64_OpGreaterMaskedFloat64x8(v)
-       case OpGreaterMaskedInt16x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x16(v)
-       case OpGreaterMaskedInt16x32:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x32(v)
-       case OpGreaterMaskedInt16x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt16x8(v)
-       case OpGreaterMaskedInt32x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x16(v)
-       case OpGreaterMaskedInt32x4:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x4(v)
-       case OpGreaterMaskedInt32x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt32x8(v)
-       case OpGreaterMaskedInt64x2:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x2(v)
-       case OpGreaterMaskedInt64x4:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x4(v)
-       case OpGreaterMaskedInt64x8:
-               return rewriteValueAMD64_OpGreaterMaskedInt64x8(v)
-       case OpGreaterMaskedInt8x16:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x16(v)
-       case OpGreaterMaskedInt8x32:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x32(v)
-       case OpGreaterMaskedInt8x64:
-               return rewriteValueAMD64_OpGreaterMaskedInt8x64(v)
-       case OpGreaterMaskedUint16x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x16(v)
-       case OpGreaterMaskedUint16x32:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x32(v)
-       case OpGreaterMaskedUint16x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint16x8(v)
-       case OpGreaterMaskedUint32x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x16(v)
-       case OpGreaterMaskedUint32x4:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x4(v)
-       case OpGreaterMaskedUint32x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint32x8(v)
-       case OpGreaterMaskedUint64x2:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x2(v)
-       case OpGreaterMaskedUint64x4:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x4(v)
-       case OpGreaterMaskedUint64x8:
-               return rewriteValueAMD64_OpGreaterMaskedUint64x8(v)
-       case OpGreaterMaskedUint8x16:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x16(v)
-       case OpGreaterMaskedUint8x32:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x32(v)
-       case OpGreaterMaskedUint8x64:
-               return rewriteValueAMD64_OpGreaterMaskedUint8x64(v)
         case OpGreaterUint16x32:
                 return rewriteValueAMD64_OpGreaterUint16x32(v)
         case OpGreaterUint32x16:
@@ -2529,18 +1965,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpIsNanFloat64x4(v)
         case OpIsNanFloat64x8:
                 return rewriteValueAMD64_OpIsNanFloat64x8(v)
-       case OpIsNanMaskedFloat32x16:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x16(v)
-       case OpIsNanMaskedFloat32x4:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x4(v)
-       case OpIsNanMaskedFloat32x8:
-               return rewriteValueAMD64_OpIsNanMaskedFloat32x8(v)
-       case OpIsNanMaskedFloat64x2:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x2(v)
-       case OpIsNanMaskedFloat64x4:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x4(v)
-       case OpIsNanMaskedFloat64x8:
-               return rewriteValueAMD64_OpIsNanMaskedFloat64x8(v)
         case OpIsNonNil:
                 return rewriteValueAMD64_OpIsNonNil(v)
         case OpIsSliceInBounds:
@@ -2605,66 +2029,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpLessEqualInt64x8(v)
         case OpLessEqualInt8x64:
                 return rewriteValueAMD64_OpLessEqualInt8x64(v)
-       case OpLessEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v)
-       case OpLessEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v)
-       case OpLessEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v)
-       case OpLessEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v)
-       case OpLessEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v)
-       case OpLessEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v)
-       case OpLessEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x16(v)
-       case OpLessEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x32(v)
-       case OpLessEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt16x8(v)
-       case OpLessEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x16(v)
-       case OpLessEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x4(v)
-       case OpLessEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt32x8(v)
-       case OpLessEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x2(v)
-       case OpLessEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x4(v)
-       case OpLessEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedInt64x8(v)
-       case OpLessEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x16(v)
-       case OpLessEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x32(v)
-       case OpLessEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpLessEqualMaskedInt8x64(v)
-       case OpLessEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x16(v)
-       case OpLessEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x32(v)
-       case OpLessEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint16x8(v)
-       case OpLessEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x16(v)
-       case OpLessEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x4(v)
-       case OpLessEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint32x8(v)
-       case OpLessEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x2(v)
-       case OpLessEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x4(v)
-       case OpLessEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpLessEqualMaskedUint64x8(v)
-       case OpLessEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x16(v)
-       case OpLessEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v)
-       case OpLessEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v)
         case OpLessEqualUint16x32:
                 return rewriteValueAMD64_OpLessEqualUint16x32(v)
         case OpLessEqualUint32x16:
@@ -2693,66 +2057,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpLessInt64x8(v)
         case OpLessInt8x64:
                 return rewriteValueAMD64_OpLessInt8x64(v)
-       case OpLessMaskedFloat32x16:
-               return rewriteValueAMD64_OpLessMaskedFloat32x16(v)
-       case OpLessMaskedFloat32x4:
-               return rewriteValueAMD64_OpLessMaskedFloat32x4(v)
-       case OpLessMaskedFloat32x8:
-               return rewriteValueAMD64_OpLessMaskedFloat32x8(v)
-       case OpLessMaskedFloat64x2:
-               return rewriteValueAMD64_OpLessMaskedFloat64x2(v)
-       case OpLessMaskedFloat64x4:
-               return rewriteValueAMD64_OpLessMaskedFloat64x4(v)
-       case OpLessMaskedFloat64x8:
-               return rewriteValueAMD64_OpLessMaskedFloat64x8(v)
-       case OpLessMaskedInt16x16:
-               return rewriteValueAMD64_OpLessMaskedInt16x16(v)
-       case OpLessMaskedInt16x32:
-               return rewriteValueAMD64_OpLessMaskedInt16x32(v)
-       case OpLessMaskedInt16x8:
-               return rewriteValueAMD64_OpLessMaskedInt16x8(v)
-       case OpLessMaskedInt32x16:
-               return rewriteValueAMD64_OpLessMaskedInt32x16(v)
-       case OpLessMaskedInt32x4:
-               return rewriteValueAMD64_OpLessMaskedInt32x4(v)
-       case OpLessMaskedInt32x8:
-               return rewriteValueAMD64_OpLessMaskedInt32x8(v)
-       case OpLessMaskedInt64x2:
-               return rewriteValueAMD64_OpLessMaskedInt64x2(v)
-       case OpLessMaskedInt64x4:
-               return rewriteValueAMD64_OpLessMaskedInt64x4(v)
-       case OpLessMaskedInt64x8:
-               return rewriteValueAMD64_OpLessMaskedInt64x8(v)
-       case OpLessMaskedInt8x16:
-               return rewriteValueAMD64_OpLessMaskedInt8x16(v)
-       case OpLessMaskedInt8x32:
-               return rewriteValueAMD64_OpLessMaskedInt8x32(v)
-       case OpLessMaskedInt8x64:
-               return rewriteValueAMD64_OpLessMaskedInt8x64(v)
-       case OpLessMaskedUint16x16:
-               return rewriteValueAMD64_OpLessMaskedUint16x16(v)
-       case OpLessMaskedUint16x32:
-               return rewriteValueAMD64_OpLessMaskedUint16x32(v)
-       case OpLessMaskedUint16x8:
-               return rewriteValueAMD64_OpLessMaskedUint16x8(v)
-       case OpLessMaskedUint32x16:
-               return rewriteValueAMD64_OpLessMaskedUint32x16(v)
-       case OpLessMaskedUint32x4:
-               return rewriteValueAMD64_OpLessMaskedUint32x4(v)
-       case OpLessMaskedUint32x8:
-               return rewriteValueAMD64_OpLessMaskedUint32x8(v)
-       case OpLessMaskedUint64x2:
-               return rewriteValueAMD64_OpLessMaskedUint64x2(v)
-       case OpLessMaskedUint64x4:
-               return rewriteValueAMD64_OpLessMaskedUint64x4(v)
-       case OpLessMaskedUint64x8:
-               return rewriteValueAMD64_OpLessMaskedUint64x8(v)
-       case OpLessMaskedUint8x16:
-               return rewriteValueAMD64_OpLessMaskedUint8x16(v)
-       case OpLessMaskedUint8x32:
-               return rewriteValueAMD64_OpLessMaskedUint8x32(v)
-       case OpLessMaskedUint8x64:
-               return rewriteValueAMD64_OpLessMaskedUint8x64(v)
         case OpLessUint16x32:
                 return rewriteValueAMD64_OpLessUint16x32(v)
         case OpLessUint32x16:
@@ -2887,66 +2191,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMaxInt8x64:
                 v.Op = OpAMD64VPMAXSB512
                 return true
-       case OpMaxMaskedFloat32x16:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x16(v)
-       case OpMaxMaskedFloat32x4:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x4(v)
-       case OpMaxMaskedFloat32x8:
-               return rewriteValueAMD64_OpMaxMaskedFloat32x8(v)
-       case OpMaxMaskedFloat64x2:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x2(v)
-       case OpMaxMaskedFloat64x4:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x4(v)
-       case OpMaxMaskedFloat64x8:
-               return rewriteValueAMD64_OpMaxMaskedFloat64x8(v)
-       case OpMaxMaskedInt16x16:
-               return rewriteValueAMD64_OpMaxMaskedInt16x16(v)
-       case OpMaxMaskedInt16x32:
-               return rewriteValueAMD64_OpMaxMaskedInt16x32(v)
-       case OpMaxMaskedInt16x8:
-               return rewriteValueAMD64_OpMaxMaskedInt16x8(v)
-       case OpMaxMaskedInt32x16:
-               return rewriteValueAMD64_OpMaxMaskedInt32x16(v)
-       case OpMaxMaskedInt32x4:
-               return rewriteValueAMD64_OpMaxMaskedInt32x4(v)
-       case OpMaxMaskedInt32x8:
-               return rewriteValueAMD64_OpMaxMaskedInt32x8(v)
-       case OpMaxMaskedInt64x2:
-               return rewriteValueAMD64_OpMaxMaskedInt64x2(v)
-       case OpMaxMaskedInt64x4:
-               return rewriteValueAMD64_OpMaxMaskedInt64x4(v)
-       case OpMaxMaskedInt64x8:
-               return rewriteValueAMD64_OpMaxMaskedInt64x8(v)
-       case OpMaxMaskedInt8x16:
-               return rewriteValueAMD64_OpMaxMaskedInt8x16(v)
-       case OpMaxMaskedInt8x32:
-               return rewriteValueAMD64_OpMaxMaskedInt8x32(v)
-       case OpMaxMaskedInt8x64:
-               return rewriteValueAMD64_OpMaxMaskedInt8x64(v)
-       case OpMaxMaskedUint16x16:
-               return rewriteValueAMD64_OpMaxMaskedUint16x16(v)
-       case OpMaxMaskedUint16x32:
-               return rewriteValueAMD64_OpMaxMaskedUint16x32(v)
-       case OpMaxMaskedUint16x8:
-               return rewriteValueAMD64_OpMaxMaskedUint16x8(v)
-       case OpMaxMaskedUint32x16:
-               return rewriteValueAMD64_OpMaxMaskedUint32x16(v)
-       case OpMaxMaskedUint32x4:
-               return rewriteValueAMD64_OpMaxMaskedUint32x4(v)
-       case OpMaxMaskedUint32x8:
-               return rewriteValueAMD64_OpMaxMaskedUint32x8(v)
-       case OpMaxMaskedUint64x2:
-               return rewriteValueAMD64_OpMaxMaskedUint64x2(v)
-       case OpMaxMaskedUint64x4:
-               return rewriteValueAMD64_OpMaxMaskedUint64x4(v)
-       case OpMaxMaskedUint64x8:
-               return rewriteValueAMD64_OpMaxMaskedUint64x8(v)
-       case OpMaxMaskedUint8x16:
-               return rewriteValueAMD64_OpMaxMaskedUint8x16(v)
-       case OpMaxMaskedUint8x32:
-               return rewriteValueAMD64_OpMaxMaskedUint8x32(v)
-       case OpMaxMaskedUint8x64:
-               return rewriteValueAMD64_OpMaxMaskedUint8x64(v)
         case OpMaxUint16x16:
                 v.Op = OpAMD64VPMAXUW256
                 return true
@@ -3041,66 +2285,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMinInt8x64:
                 v.Op = OpAMD64VPMINSB512
                 return true
-       case OpMinMaskedFloat32x16:
-               return rewriteValueAMD64_OpMinMaskedFloat32x16(v)
-       case OpMinMaskedFloat32x4:
-               return rewriteValueAMD64_OpMinMaskedFloat32x4(v)
-       case OpMinMaskedFloat32x8:
-               return rewriteValueAMD64_OpMinMaskedFloat32x8(v)
-       case OpMinMaskedFloat64x2:
-               return rewriteValueAMD64_OpMinMaskedFloat64x2(v)
-       case OpMinMaskedFloat64x4:
-               return rewriteValueAMD64_OpMinMaskedFloat64x4(v)
-       case OpMinMaskedFloat64x8:
-               return rewriteValueAMD64_OpMinMaskedFloat64x8(v)
-       case OpMinMaskedInt16x16:
-               return rewriteValueAMD64_OpMinMaskedInt16x16(v)
-       case OpMinMaskedInt16x32:
-               return rewriteValueAMD64_OpMinMaskedInt16x32(v)
-       case OpMinMaskedInt16x8:
-               return rewriteValueAMD64_OpMinMaskedInt16x8(v)
-       case OpMinMaskedInt32x16:
-               return rewriteValueAMD64_OpMinMaskedInt32x16(v)
-       case OpMinMaskedInt32x4:
-               return rewriteValueAMD64_OpMinMaskedInt32x4(v)
-       case OpMinMaskedInt32x8:
-               return rewriteValueAMD64_OpMinMaskedInt32x8(v)
-       case OpMinMaskedInt64x2:
-               return rewriteValueAMD64_OpMinMaskedInt64x2(v)
-       case OpMinMaskedInt64x4:
-               return rewriteValueAMD64_OpMinMaskedInt64x4(v)
-       case OpMinMaskedInt64x8:
-               return rewriteValueAMD64_OpMinMaskedInt64x8(v)
-       case OpMinMaskedInt8x16:
-               return rewriteValueAMD64_OpMinMaskedInt8x16(v)
-       case OpMinMaskedInt8x32:
-               return rewriteValueAMD64_OpMinMaskedInt8x32(v)
-       case OpMinMaskedInt8x64:
-               return rewriteValueAMD64_OpMinMaskedInt8x64(v)
-       case OpMinMaskedUint16x16:
-               return rewriteValueAMD64_OpMinMaskedUint16x16(v)
-       case OpMinMaskedUint16x32:
-               return rewriteValueAMD64_OpMinMaskedUint16x32(v)
-       case OpMinMaskedUint16x8:
-               return rewriteValueAMD64_OpMinMaskedUint16x8(v)
-       case OpMinMaskedUint32x16:
-               return rewriteValueAMD64_OpMinMaskedUint32x16(v)
-       case OpMinMaskedUint32x4:
-               return rewriteValueAMD64_OpMinMaskedUint32x4(v)
-       case OpMinMaskedUint32x8:
-               return rewriteValueAMD64_OpMinMaskedUint32x8(v)
-       case OpMinMaskedUint64x2:
-               return rewriteValueAMD64_OpMinMaskedUint64x2(v)
-       case OpMinMaskedUint64x4:
-               return rewriteValueAMD64_OpMinMaskedUint64x4(v)
-       case OpMinMaskedUint64x8:
-               return rewriteValueAMD64_OpMinMaskedUint64x8(v)
-       case OpMinMaskedUint8x16:
-               return rewriteValueAMD64_OpMinMaskedUint8x16(v)
-       case OpMinMaskedUint8x32:
-               return rewriteValueAMD64_OpMinMaskedUint8x32(v)
-       case OpMinMaskedUint8x64:
-               return rewriteValueAMD64_OpMinMaskedUint8x64(v)
         case OpMinUint16x16:
                 v.Op = OpAMD64VPMINUW256
                 return true
@@ -3194,18 +2378,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMulAddFloat64x8:
                 v.Op = OpAMD64VFMADD213PD512
                 return true
-       case OpMulAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x16(v)
-       case OpMulAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x4(v)
-       case OpMulAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulAddMaskedFloat32x8(v)
-       case OpMulAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x2(v)
-       case OpMulAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x4(v)
-       case OpMulAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulAddMaskedFloat64x8(v)
         case OpMulAddSubFloat32x16:
                 v.Op = OpAMD64VFMADDSUB213PS512
                 return true
@@ -3224,18 +2396,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMulAddSubFloat64x8:
                 v.Op = OpAMD64VFMADDSUB213PD512
                 return true
-       case OpMulAddSubMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v)
-       case OpMulAddSubMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v)
-       case OpMulAddSubMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v)
-       case OpMulAddSubMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v)
-       case OpMulAddSubMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v)
-       case OpMulAddSubMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v)
         case OpMulEvenWidenInt32x4:
                 v.Op = OpAMD64VPMULDQ128
                 return true
@@ -3275,18 +2435,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMulHighInt16x8:
                 v.Op = OpAMD64VPMULHW128
                 return true
-       case OpMulHighMaskedInt16x16:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x16(v)
-       case OpMulHighMaskedInt16x32:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x32(v)
-       case OpMulHighMaskedInt16x8:
-               return rewriteValueAMD64_OpMulHighMaskedInt16x8(v)
-       case OpMulHighMaskedUint16x16:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x16(v)
-       case OpMulHighMaskedUint16x32:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x32(v)
-       case OpMulHighMaskedUint16x8:
-               return rewriteValueAMD64_OpMulHighMaskedUint16x8(v)
         case OpMulHighUint16x16:
                 v.Op = OpAMD64VPMULHUW256
                 return true
@@ -3323,54 +2471,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMulInt64x8:
                 v.Op = OpAMD64VPMULLQ512
                 return true
-       case OpMulMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulMaskedFloat32x16(v)
-       case OpMulMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulMaskedFloat32x4(v)
-       case OpMulMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulMaskedFloat32x8(v)
-       case OpMulMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulMaskedFloat64x2(v)
-       case OpMulMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulMaskedFloat64x4(v)
-       case OpMulMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulMaskedFloat64x8(v)
-       case OpMulMaskedInt16x16:
-               return rewriteValueAMD64_OpMulMaskedInt16x16(v)
-       case OpMulMaskedInt16x32:
-               return rewriteValueAMD64_OpMulMaskedInt16x32(v)
-       case OpMulMaskedInt16x8:
-               return rewriteValueAMD64_OpMulMaskedInt16x8(v)
-       case OpMulMaskedInt32x16:
-               return rewriteValueAMD64_OpMulMaskedInt32x16(v)
-       case OpMulMaskedInt32x4:
-               return rewriteValueAMD64_OpMulMaskedInt32x4(v)
-       case OpMulMaskedInt32x8:
-               return rewriteValueAMD64_OpMulMaskedInt32x8(v)
-       case OpMulMaskedInt64x2:
-               return rewriteValueAMD64_OpMulMaskedInt64x2(v)
-       case OpMulMaskedInt64x4:
-               return rewriteValueAMD64_OpMulMaskedInt64x4(v)
-       case OpMulMaskedInt64x8:
-               return rewriteValueAMD64_OpMulMaskedInt64x8(v)
-       case OpMulMaskedUint16x16:
-               return rewriteValueAMD64_OpMulMaskedUint16x16(v)
-       case OpMulMaskedUint16x32:
-               return rewriteValueAMD64_OpMulMaskedUint16x32(v)
-       case OpMulMaskedUint16x8:
-               return rewriteValueAMD64_OpMulMaskedUint16x8(v)
-       case OpMulMaskedUint32x16:
-               return rewriteValueAMD64_OpMulMaskedUint32x16(v)
-       case OpMulMaskedUint32x4:
-               return rewriteValueAMD64_OpMulMaskedUint32x4(v)
-       case OpMulMaskedUint32x8:
-               return rewriteValueAMD64_OpMulMaskedUint32x8(v)
-       case OpMulMaskedUint64x2:
-               return rewriteValueAMD64_OpMulMaskedUint64x2(v)
-       case OpMulMaskedUint64x4:
-               return rewriteValueAMD64_OpMulMaskedUint64x4(v)
-       case OpMulMaskedUint64x8:
-               return rewriteValueAMD64_OpMulMaskedUint64x8(v)
         case OpMulSubAddFloat32x16:
                 v.Op = OpAMD64VFMSUBADD213PS512
                 return true
@@ -3389,18 +2489,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpMulSubAddFloat64x8:
                 v.Op = OpAMD64VFMSUBADD213PD512
                 return true
-       case OpMulSubAddMaskedFloat32x16:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v)
-       case OpMulSubAddMaskedFloat32x4:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v)
-       case OpMulSubAddMaskedFloat32x8:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v)
-       case OpMulSubAddMaskedFloat64x2:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v)
-       case OpMulSubAddMaskedFloat64x4:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v)
-       case OpMulSubAddMaskedFloat64x8:
-               return rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v)
         case OpMulUint16x16:
                 v.Op = OpAMD64VPMULLW256
                 return true
@@ -3485,66 +2573,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpNotEqualInt64x8(v)
         case OpNotEqualInt8x64:
                 return rewriteValueAMD64_OpNotEqualInt8x64(v)
-       case OpNotEqualMaskedFloat32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v)
-       case OpNotEqualMaskedFloat32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v)
-       case OpNotEqualMaskedFloat32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v)
-       case OpNotEqualMaskedFloat64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v)
-       case OpNotEqualMaskedFloat64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v)
-       case OpNotEqualMaskedFloat64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v)
-       case OpNotEqualMaskedInt16x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x16(v)
-       case OpNotEqualMaskedInt16x32:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x32(v)
-       case OpNotEqualMaskedInt16x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt16x8(v)
-       case OpNotEqualMaskedInt32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x16(v)
-       case OpNotEqualMaskedInt32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x4(v)
-       case OpNotEqualMaskedInt32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt32x8(v)
-       case OpNotEqualMaskedInt64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x2(v)
-       case OpNotEqualMaskedInt64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x4(v)
-       case OpNotEqualMaskedInt64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedInt64x8(v)
-       case OpNotEqualMaskedInt8x16:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x16(v)
-       case OpNotEqualMaskedInt8x32:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x32(v)
-       case OpNotEqualMaskedInt8x64:
-               return rewriteValueAMD64_OpNotEqualMaskedInt8x64(v)
-       case OpNotEqualMaskedUint16x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x16(v)
-       case OpNotEqualMaskedUint16x32:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x32(v)
-       case OpNotEqualMaskedUint16x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint16x8(v)
-       case OpNotEqualMaskedUint32x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x16(v)
-       case OpNotEqualMaskedUint32x4:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x4(v)
-       case OpNotEqualMaskedUint32x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint32x8(v)
-       case OpNotEqualMaskedUint64x2:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x2(v)
-       case OpNotEqualMaskedUint64x4:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x4(v)
-       case OpNotEqualMaskedUint64x8:
-               return rewriteValueAMD64_OpNotEqualMaskedUint64x8(v)
-       case OpNotEqualMaskedUint8x16:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x16(v)
-       case OpNotEqualMaskedUint8x32:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v)
-       case OpNotEqualMaskedUint8x64:
-               return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v)
         case OpNotEqualUint16x32:
                 return rewriteValueAMD64_OpNotEqualUint16x32(v)
         case OpNotEqualUint32x16:
@@ -3591,54 +2619,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpOnesCountInt8x64:
                 v.Op = OpAMD64VPOPCNTB512
                 return true
-       case OpOnesCountMaskedInt16x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x16(v)
-       case OpOnesCountMaskedInt16x32:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x32(v)
-       case OpOnesCountMaskedInt16x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt16x8(v)
-       case OpOnesCountMaskedInt32x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x16(v)
-       case OpOnesCountMaskedInt32x4:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x4(v)
-       case OpOnesCountMaskedInt32x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt32x8(v)
-       case OpOnesCountMaskedInt64x2:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x2(v)
-       case OpOnesCountMaskedInt64x4:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x4(v)
-       case OpOnesCountMaskedInt64x8:
-               return rewriteValueAMD64_OpOnesCountMaskedInt64x8(v)
-       case OpOnesCountMaskedInt8x16:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x16(v)
-       case OpOnesCountMaskedInt8x32:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x32(v)
-       case OpOnesCountMaskedInt8x64:
-               return rewriteValueAMD64_OpOnesCountMaskedInt8x64(v)
-       case OpOnesCountMaskedUint16x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x16(v)
-       case OpOnesCountMaskedUint16x32:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x32(v)
-       case OpOnesCountMaskedUint16x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint16x8(v)
-       case OpOnesCountMaskedUint32x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x16(v)
-       case OpOnesCountMaskedUint32x4:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x4(v)
-       case OpOnesCountMaskedUint32x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint32x8(v)
-       case OpOnesCountMaskedUint64x2:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x2(v)
-       case OpOnesCountMaskedUint64x4:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x4(v)
-       case OpOnesCountMaskedUint64x8:
-               return rewriteValueAMD64_OpOnesCountMaskedUint64x8(v)
-       case OpOnesCountMaskedUint8x16:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x16(v)
-       case OpOnesCountMaskedUint8x32:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x32(v)
-       case OpOnesCountMaskedUint8x64:
-               return rewriteValueAMD64_OpOnesCountMaskedUint8x64(v)
         case OpOnesCountUint16x16:
                 v.Op = OpAMD64VPOPCNTW256
                 return true
@@ -3726,30 +2706,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpOrInt8x64:
                 v.Op = OpAMD64VPORD512
                 return true
-       case OpOrMaskedInt32x16:
-               return rewriteValueAMD64_OpOrMaskedInt32x16(v)
-       case OpOrMaskedInt32x4:
-               return rewriteValueAMD64_OpOrMaskedInt32x4(v)
-       case OpOrMaskedInt32x8:
-               return rewriteValueAMD64_OpOrMaskedInt32x8(v)
-       case OpOrMaskedInt64x2:
-               return rewriteValueAMD64_OpOrMaskedInt64x2(v)
-       case OpOrMaskedInt64x4:
-               return rewriteValueAMD64_OpOrMaskedInt64x4(v)
-       case OpOrMaskedInt64x8:
-               return rewriteValueAMD64_OpOrMaskedInt64x8(v)
-       case OpOrMaskedUint32x16:
-               return rewriteValueAMD64_OpOrMaskedUint32x16(v)
-       case OpOrMaskedUint32x4:
-               return rewriteValueAMD64_OpOrMaskedUint32x4(v)
-       case OpOrMaskedUint32x8:
-               return rewriteValueAMD64_OpOrMaskedUint32x8(v)
-       case OpOrMaskedUint64x2:
-               return rewriteValueAMD64_OpOrMaskedUint64x2(v)
-       case OpOrMaskedUint64x4:
-               return rewriteValueAMD64_OpOrMaskedUint64x4(v)
-       case OpOrMaskedUint64x8:
-               return rewriteValueAMD64_OpOrMaskedUint64x8(v)
         case OpOrUint16x16:
                 v.Op = OpAMD64VPOR256
                 return true
@@ -3843,66 +2799,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpPermute2Int8x64:
                 v.Op = OpAMD64VPERMI2B512
                 return true
-       case OpPermute2MaskedFloat32x16:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x16(v)
-       case OpPermute2MaskedFloat32x4:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x4(v)
-       case OpPermute2MaskedFloat32x8:
-               return rewriteValueAMD64_OpPermute2MaskedFloat32x8(v)
-       case OpPermute2MaskedFloat64x2:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x2(v)
-       case OpPermute2MaskedFloat64x4:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x4(v)
-       case OpPermute2MaskedFloat64x8:
-               return rewriteValueAMD64_OpPermute2MaskedFloat64x8(v)
-       case OpPermute2MaskedInt16x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x16(v)
-       case OpPermute2MaskedInt16x32:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x32(v)
-       case OpPermute2MaskedInt16x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt16x8(v)
-       case OpPermute2MaskedInt32x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x16(v)
-       case OpPermute2MaskedInt32x4:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x4(v)
-       case OpPermute2MaskedInt32x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt32x8(v)
-       case OpPermute2MaskedInt64x2:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x2(v)
-       case OpPermute2MaskedInt64x4:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x4(v)
-       case OpPermute2MaskedInt64x8:
-               return rewriteValueAMD64_OpPermute2MaskedInt64x8(v)
-       case OpPermute2MaskedInt8x16:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x16(v)
-       case OpPermute2MaskedInt8x32:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x32(v)
-       case OpPermute2MaskedInt8x64:
-               return rewriteValueAMD64_OpPermute2MaskedInt8x64(v)
-       case OpPermute2MaskedUint16x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x16(v)
-       case OpPermute2MaskedUint16x32:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x32(v)
-       case OpPermute2MaskedUint16x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint16x8(v)
-       case OpPermute2MaskedUint32x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x16(v)
-       case OpPermute2MaskedUint32x4:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x4(v)
-       case OpPermute2MaskedUint32x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint32x8(v)
-       case OpPermute2MaskedUint64x2:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x2(v)
-       case OpPermute2MaskedUint64x4:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x4(v)
-       case OpPermute2MaskedUint64x8:
-               return rewriteValueAMD64_OpPermute2MaskedUint64x8(v)
-       case OpPermute2MaskedUint8x16:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x16(v)
-       case OpPermute2MaskedUint8x32:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x32(v)
-       case OpPermute2MaskedUint8x64:
-               return rewriteValueAMD64_OpPermute2MaskedUint8x64(v)
         case OpPermute2Uint16x16:
                 v.Op = OpAMD64VPERMI2W256
                 return true
@@ -3981,54 +2877,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpPermuteInt8x64:
                 v.Op = OpAMD64VPERMB512
                 return true
-       case OpPermuteMaskedFloat32x16:
-               return rewriteValueAMD64_OpPermuteMaskedFloat32x16(v)
-       case OpPermuteMaskedFloat32x8:
-               return rewriteValueAMD64_OpPermuteMaskedFloat32x8(v)
-       case OpPermuteMaskedFloat64x4:
-               return rewriteValueAMD64_OpPermuteMaskedFloat64x4(v)
-       case OpPermuteMaskedFloat64x8:
-               return rewriteValueAMD64_OpPermuteMaskedFloat64x8(v)
-       case OpPermuteMaskedInt16x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x16(v)
-       case OpPermuteMaskedInt16x32:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x32(v)
-       case OpPermuteMaskedInt16x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt16x8(v)
-       case OpPermuteMaskedInt32x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt32x16(v)
-       case OpPermuteMaskedInt32x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt32x8(v)
-       case OpPermuteMaskedInt64x4:
-               return rewriteValueAMD64_OpPermuteMaskedInt64x4(v)
-       case OpPermuteMaskedInt64x8:
-               return rewriteValueAMD64_OpPermuteMaskedInt64x8(v)
-       case OpPermuteMaskedInt8x16:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x16(v)
-       case OpPermuteMaskedInt8x32:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x32(v)
-       case OpPermuteMaskedInt8x64:
-               return rewriteValueAMD64_OpPermuteMaskedInt8x64(v)
-       case OpPermuteMaskedUint16x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x16(v)
-       case OpPermuteMaskedUint16x32:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x32(v)
-       case OpPermuteMaskedUint16x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint16x8(v)
-       case OpPermuteMaskedUint32x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint32x16(v)
-       case OpPermuteMaskedUint32x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint32x8(v)
-       case OpPermuteMaskedUint64x4:
-               return rewriteValueAMD64_OpPermuteMaskedUint64x4(v)
-       case OpPermuteMaskedUint64x8:
-               return rewriteValueAMD64_OpPermuteMaskedUint64x8(v)
-       case OpPermuteMaskedUint8x16:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x16(v)
-       case OpPermuteMaskedUint8x32:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x32(v)
-       case OpPermuteMaskedUint8x64:
-               return rewriteValueAMD64_OpPermuteMaskedUint8x64(v)
         case OpPermuteUint16x16:
                 v.Op = OpAMD64VPERMW256
                 return true
@@ -4093,18 +2941,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpReciprocalFloat64x8:
                 v.Op = OpAMD64VRCP14PD512
                 return true
-       case OpReciprocalMaskedFloat32x16:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v)
-       case OpReciprocalMaskedFloat32x4:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v)
-       case OpReciprocalMaskedFloat32x8:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v)
-       case OpReciprocalMaskedFloat64x2:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v)
-       case OpReciprocalMaskedFloat64x4:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v)
-       case OpReciprocalMaskedFloat64x8:
-               return rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v)
         case OpReciprocalSqrtFloat32x16:
                 v.Op = OpAMD64VRSQRT14PS512
                 return true
@@ -4123,18 +2959,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpReciprocalSqrtFloat64x8:
                 v.Op = OpAMD64VRSQRT14PD512
                 return true
-       case OpReciprocalSqrtMaskedFloat32x16:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v)
-       case OpReciprocalSqrtMaskedFloat32x4:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v)
-       case OpReciprocalSqrtMaskedFloat32x8:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v)
-       case OpReciprocalSqrtMaskedFloat64x2:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v)
-       case OpReciprocalSqrtMaskedFloat64x4:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v)
-       case OpReciprocalSqrtMaskedFloat64x8:
-               return rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v)
         case OpRotateAllLeftInt32x16:
                 v.Op = OpAMD64VPROLD512
                 return true
@@ -4153,30 +2977,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpRotateAllLeftInt64x8:
                 v.Op = OpAMD64VPROLQ512
                 return true
-       case OpRotateAllLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v)
-       case OpRotateAllLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v)
-       case OpRotateAllLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v)
-       case OpRotateAllLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v)
-       case OpRotateAllLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v)
-       case OpRotateAllLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v)
-       case OpRotateAllLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v)
-       case OpRotateAllLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v)
-       case OpRotateAllLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v)
-       case OpRotateAllLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v)
-       case OpRotateAllLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v)
-       case OpRotateAllLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v)
         case OpRotateAllLeftUint32x16:
                 v.Op = OpAMD64VPROLD512
                 return true
@@ -4213,30 +3013,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpRotateAllRightInt64x8:
                 v.Op = OpAMD64VPRORQ512
                 return true
-       case OpRotateAllRightMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v)
-       case OpRotateAllRightMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v)
-       case OpRotateAllRightMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v)
-       case OpRotateAllRightMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v)
-       case OpRotateAllRightMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v)
-       case OpRotateAllRightMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v)
-       case OpRotateAllRightMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v)
-       case OpRotateAllRightMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v)
-       case OpRotateAllRightMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v)
-       case OpRotateAllRightMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v)
-       case OpRotateAllRightMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v)
-       case OpRotateAllRightMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v)
         case OpRotateAllRightUint32x16:
                 v.Op = OpAMD64VPRORD512
                 return true
@@ -4285,30 +3061,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpRotateLeftInt64x8:
                 v.Op = OpAMD64VPROLVQ512
                 return true
-       case OpRotateLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v)
-       case OpRotateLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v)
-       case OpRotateLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v)
-       case OpRotateLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v)
-       case OpRotateLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v)
-       case OpRotateLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v)
-       case OpRotateLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v)
-       case OpRotateLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v)
-       case OpRotateLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v)
-       case OpRotateLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v)
-       case OpRotateLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v)
-       case OpRotateLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v)
         case OpRotateLeftUint32x16:
                 v.Op = OpAMD64VPROLVD512
                 return true
@@ -4345,30 +3097,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpRotateRightInt64x8:
                 v.Op = OpAMD64VPRORVQ512
                 return true
-       case OpRotateRightMaskedInt32x16:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x16(v)
-       case OpRotateRightMaskedInt32x4:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x4(v)
-       case OpRotateRightMaskedInt32x8:
-               return rewriteValueAMD64_OpRotateRightMaskedInt32x8(v)
-       case OpRotateRightMaskedInt64x2:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x2(v)
-       case OpRotateRightMaskedInt64x4:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x4(v)
-       case OpRotateRightMaskedInt64x8:
-               return rewriteValueAMD64_OpRotateRightMaskedInt64x8(v)
-       case OpRotateRightMaskedUint32x16:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x16(v)
-       case OpRotateRightMaskedUint32x4:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x4(v)
-       case OpRotateRightMaskedUint32x8:
-               return rewriteValueAMD64_OpRotateRightMaskedUint32x8(v)
-       case OpRotateRightMaskedUint64x2:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x2(v)
-       case OpRotateRightMaskedUint64x4:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x4(v)
-       case OpRotateRightMaskedUint64x8:
-               return rewriteValueAMD64_OpRotateRightMaskedUint64x8(v)
         case OpRotateRightUint32x16:
                 v.Op = OpAMD64VPRORVD512
                 return true
@@ -4415,18 +3143,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v)
         case OpRoundToEvenScaledFloat64x8:
                 return rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v)
-       case OpRoundToEvenScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v)
-       case OpRoundToEvenScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v)
-       case OpRoundToEvenScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v)
-       case OpRoundToEvenScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v)
-       case OpRoundToEvenScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v)
-       case OpRoundToEvenScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v)
         case OpRoundToEvenScaledResidueFloat32x16:
                 return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v)
         case OpRoundToEvenScaledResidueFloat32x4:
@@ -4439,18 +3155,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v)
         case OpRoundToEvenScaledResidueFloat64x8:
                 return rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v)
-       case OpRoundToEvenScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v)
-       case OpRoundToEvenScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v)
         case OpRsh16Ux16:
                 return rewriteValueAMD64_OpRsh16Ux16(v)
         case OpRsh16Ux32:
@@ -4533,18 +3237,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpScaleFloat64x8:
                 v.Op = OpAMD64VSCALEFPD512
                 return true
-       case OpScaleMaskedFloat32x16:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x16(v)
-       case OpScaleMaskedFloat32x4:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x4(v)
-       case OpScaleMaskedFloat32x8:
-               return rewriteValueAMD64_OpScaleMaskedFloat32x8(v)
-       case OpScaleMaskedFloat64x2:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x2(v)
-       case OpScaleMaskedFloat64x4:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x4(v)
-       case OpScaleMaskedFloat64x8:
-               return rewriteValueAMD64_OpScaleMaskedFloat64x8(v)
         case OpSelect0:
                 return rewriteValueAMD64_OpSelect0(v)
         case OpSelect1:
@@ -4688,42 +3380,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftAllLeftConcatInt64x8:
                 v.Op = OpAMD64VPSHLDQ512
                 return true
-       case OpShiftAllLeftConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v)
-       case OpShiftAllLeftConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v)
-       case OpShiftAllLeftConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v)
-       case OpShiftAllLeftConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v)
-       case OpShiftAllLeftConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v)
-       case OpShiftAllLeftConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v)
-       case OpShiftAllLeftConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v)
-       case OpShiftAllLeftConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v)
-       case OpShiftAllLeftConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v)
-       case OpShiftAllLeftConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v)
-       case OpShiftAllLeftConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v)
-       case OpShiftAllLeftConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v)
-       case OpShiftAllLeftConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v)
-       case OpShiftAllLeftConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v)
-       case OpShiftAllLeftConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v)
-       case OpShiftAllLeftConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v)
-       case OpShiftAllLeftConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v)
-       case OpShiftAllLeftConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v)
         case OpShiftAllLeftConcatUint16x16:
                 v.Op = OpAMD64VPSHLDW256
                 return true
@@ -4778,42 +3434,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftAllLeftInt64x8:
                 v.Op = OpAMD64VPSLLQ512
                 return true
-       case OpShiftAllLeftMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v)
-       case OpShiftAllLeftMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v)
-       case OpShiftAllLeftMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v)
-       case OpShiftAllLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v)
-       case OpShiftAllLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v)
-       case OpShiftAllLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v)
-       case OpShiftAllLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v)
-       case OpShiftAllLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v)
-       case OpShiftAllLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v)
-       case OpShiftAllLeftMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v)
-       case OpShiftAllLeftMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v)
-       case OpShiftAllLeftMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v)
-       case OpShiftAllLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v)
-       case OpShiftAllLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v)
-       case OpShiftAllLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v)
-       case OpShiftAllLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v)
-       case OpShiftAllLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v)
-       case OpShiftAllLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v)
         case OpShiftAllLeftUint16x16:
                 v.Op = OpAMD64VPSLLW256
                 return true
@@ -4868,42 +3488,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftAllRightConcatInt64x8:
                 v.Op = OpAMD64VPSHRDQ512
                 return true
-       case OpShiftAllRightConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v)
-       case OpShiftAllRightConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v)
-       case OpShiftAllRightConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v)
-       case OpShiftAllRightConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v)
-       case OpShiftAllRightConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v)
-       case OpShiftAllRightConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v)
-       case OpShiftAllRightConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v)
-       case OpShiftAllRightConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v)
-       case OpShiftAllRightConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v)
-       case OpShiftAllRightConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v)
-       case OpShiftAllRightConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v)
-       case OpShiftAllRightConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v)
-       case OpShiftAllRightConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v)
-       case OpShiftAllRightConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v)
-       case OpShiftAllRightConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v)
-       case OpShiftAllRightConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v)
-       case OpShiftAllRightConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v)
-       case OpShiftAllRightConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v)
         case OpShiftAllRightConcatUint16x16:
                 v.Op = OpAMD64VPSHRDW256
                 return true
@@ -4958,42 +3542,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftAllRightInt64x8:
                 v.Op = OpAMD64VPSRAQ512
                 return true
-       case OpShiftAllRightMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v)
-       case OpShiftAllRightMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v)
-       case OpShiftAllRightMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v)
-       case OpShiftAllRightMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v)
-       case OpShiftAllRightMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v)
-       case OpShiftAllRightMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v)
-       case OpShiftAllRightMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v)
-       case OpShiftAllRightMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v)
-       case OpShiftAllRightMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v)
-       case OpShiftAllRightMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v)
-       case OpShiftAllRightMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v)
-       case OpShiftAllRightMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v)
-       case OpShiftAllRightMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v)
-       case OpShiftAllRightMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v)
-       case OpShiftAllRightMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v)
-       case OpShiftAllRightMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v)
-       case OpShiftAllRightMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v)
-       case OpShiftAllRightMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v)
         case OpShiftAllRightUint16x16:
                 v.Op = OpAMD64VPSRLW256
                 return true
@@ -5048,42 +3596,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftLeftConcatInt64x8:
                 v.Op = OpAMD64VPSHLDVQ512
                 return true
-       case OpShiftLeftConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v)
-       case OpShiftLeftConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v)
-       case OpShiftLeftConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v)
-       case OpShiftLeftConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v)
-       case OpShiftLeftConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v)
-       case OpShiftLeftConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v)
-       case OpShiftLeftConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v)
-       case OpShiftLeftConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v)
-       case OpShiftLeftConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v)
-       case OpShiftLeftConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v)
-       case OpShiftLeftConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v)
-       case OpShiftLeftConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v)
-       case OpShiftLeftConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v)
-       case OpShiftLeftConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v)
-       case OpShiftLeftConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v)
-       case OpShiftLeftConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v)
-       case OpShiftLeftConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v)
-       case OpShiftLeftConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v)
         case OpShiftLeftConcatUint16x16:
                 v.Op = OpAMD64VPSHLDVW256
                 return true
@@ -5138,42 +3650,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftLeftInt64x8:
                 v.Op = OpAMD64VPSLLVQ512
                 return true
-       case OpShiftLeftMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v)
-       case OpShiftLeftMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v)
-       case OpShiftLeftMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v)
-       case OpShiftLeftMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v)
-       case OpShiftLeftMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v)
-       case OpShiftLeftMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v)
-       case OpShiftLeftMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v)
-       case OpShiftLeftMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v)
-       case OpShiftLeftMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v)
-       case OpShiftLeftMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v)
-       case OpShiftLeftMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v)
-       case OpShiftLeftMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v)
-       case OpShiftLeftMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v)
-       case OpShiftLeftMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v)
-       case OpShiftLeftMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v)
-       case OpShiftLeftMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v)
-       case OpShiftLeftMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v)
-       case OpShiftLeftMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v)
         case OpShiftLeftUint16x16:
                 v.Op = OpAMD64VPSLLVW256
                 return true
@@ -5228,42 +3704,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftRightConcatInt64x8:
                 v.Op = OpAMD64VPSHRDVQ512
                 return true
-       case OpShiftRightConcatMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v)
-       case OpShiftRightConcatMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v)
-       case OpShiftRightConcatMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v)
-       case OpShiftRightConcatMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v)
-       case OpShiftRightConcatMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v)
-       case OpShiftRightConcatMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v)
-       case OpShiftRightConcatMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v)
-       case OpShiftRightConcatMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v)
-       case OpShiftRightConcatMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v)
-       case OpShiftRightConcatMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v)
-       case OpShiftRightConcatMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v)
-       case OpShiftRightConcatMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v)
-       case OpShiftRightConcatMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v)
-       case OpShiftRightConcatMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v)
-       case OpShiftRightConcatMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v)
-       case OpShiftRightConcatMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v)
-       case OpShiftRightConcatMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v)
-       case OpShiftRightConcatMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v)
         case OpShiftRightConcatUint16x16:
                 v.Op = OpAMD64VPSHRDVW256
                 return true
@@ -5318,42 +3758,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpShiftRightInt64x8:
                 v.Op = OpAMD64VPSRAVQ512
                 return true
-       case OpShiftRightMaskedInt16x16:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x16(v)
-       case OpShiftRightMaskedInt16x32:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x32(v)
-       case OpShiftRightMaskedInt16x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt16x8(v)
-       case OpShiftRightMaskedInt32x16:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x16(v)
-       case OpShiftRightMaskedInt32x4:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x4(v)
-       case OpShiftRightMaskedInt32x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt32x8(v)
-       case OpShiftRightMaskedInt64x2:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x2(v)
-       case OpShiftRightMaskedInt64x4:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x4(v)
-       case OpShiftRightMaskedInt64x8:
-               return rewriteValueAMD64_OpShiftRightMaskedInt64x8(v)
-       case OpShiftRightMaskedUint16x16:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x16(v)
-       case OpShiftRightMaskedUint16x32:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x32(v)
-       case OpShiftRightMaskedUint16x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint16x8(v)
-       case OpShiftRightMaskedUint32x16:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x16(v)
-       case OpShiftRightMaskedUint32x4:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x4(v)
-       case OpShiftRightMaskedUint32x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint32x8(v)
-       case OpShiftRightMaskedUint64x2:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x2(v)
-       case OpShiftRightMaskedUint64x4:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x4(v)
-       case OpShiftRightMaskedUint64x8:
-               return rewriteValueAMD64_OpShiftRightMaskedUint64x8(v)
         case OpShiftRightUint16x16:
                 v.Op = OpAMD64VPSRLVW256
                 return true
@@ -5429,18 +3833,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpSqrtFloat64x8:
                 v.Op = OpAMD64VSQRTPD512
                 return true
-       case OpSqrtMaskedFloat32x16:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x16(v)
-       case OpSqrtMaskedFloat32x4:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x4(v)
-       case OpSqrtMaskedFloat32x8:
-               return rewriteValueAMD64_OpSqrtMaskedFloat32x8(v)
-       case OpSqrtMaskedFloat64x2:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x2(v)
-       case OpSqrtMaskedFloat64x4:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x4(v)
-       case OpSqrtMaskedFloat64x8:
-               return rewriteValueAMD64_OpSqrtMaskedFloat64x8(v)
         case OpStaticCall:
                 v.Op = OpAMD64CALLstatic
                 return true
@@ -5550,66 +3942,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpSubInt8x64:
                 v.Op = OpAMD64VPSUBB512
                 return true
-       case OpSubMaskedFloat32x16:
-               return rewriteValueAMD64_OpSubMaskedFloat32x16(v)
-       case OpSubMaskedFloat32x4:
-               return rewriteValueAMD64_OpSubMaskedFloat32x4(v)
-       case OpSubMaskedFloat32x8:
-               return rewriteValueAMD64_OpSubMaskedFloat32x8(v)
-       case OpSubMaskedFloat64x2:
-               return rewriteValueAMD64_OpSubMaskedFloat64x2(v)
-       case OpSubMaskedFloat64x4:
-               return rewriteValueAMD64_OpSubMaskedFloat64x4(v)
-       case OpSubMaskedFloat64x8:
-               return rewriteValueAMD64_OpSubMaskedFloat64x8(v)
-       case OpSubMaskedInt16x16:
-               return rewriteValueAMD64_OpSubMaskedInt16x16(v)
-       case OpSubMaskedInt16x32:
-               return rewriteValueAMD64_OpSubMaskedInt16x32(v)
-       case OpSubMaskedInt16x8:
-               return rewriteValueAMD64_OpSubMaskedInt16x8(v)
-       case OpSubMaskedInt32x16:
-               return rewriteValueAMD64_OpSubMaskedInt32x16(v)
-       case OpSubMaskedInt32x4:
-               return rewriteValueAMD64_OpSubMaskedInt32x4(v)
-       case OpSubMaskedInt32x8:
-               return rewriteValueAMD64_OpSubMaskedInt32x8(v)
-       case OpSubMaskedInt64x2:
-               return rewriteValueAMD64_OpSubMaskedInt64x2(v)
-       case OpSubMaskedInt64x4:
-               return rewriteValueAMD64_OpSubMaskedInt64x4(v)
-       case OpSubMaskedInt64x8:
-               return rewriteValueAMD64_OpSubMaskedInt64x8(v)
-       case OpSubMaskedInt8x16:
-               return rewriteValueAMD64_OpSubMaskedInt8x16(v)
-       case OpSubMaskedInt8x32:
-               return rewriteValueAMD64_OpSubMaskedInt8x32(v)
-       case OpSubMaskedInt8x64:
-               return rewriteValueAMD64_OpSubMaskedInt8x64(v)
-       case OpSubMaskedUint16x16:
-               return rewriteValueAMD64_OpSubMaskedUint16x16(v)
-       case OpSubMaskedUint16x32:
-               return rewriteValueAMD64_OpSubMaskedUint16x32(v)
-       case OpSubMaskedUint16x8:
-               return rewriteValueAMD64_OpSubMaskedUint16x8(v)
-       case OpSubMaskedUint32x16:
-               return rewriteValueAMD64_OpSubMaskedUint32x16(v)
-       case OpSubMaskedUint32x4:
-               return rewriteValueAMD64_OpSubMaskedUint32x4(v)
-       case OpSubMaskedUint32x8:
-               return rewriteValueAMD64_OpSubMaskedUint32x8(v)
-       case OpSubMaskedUint64x2:
-               return rewriteValueAMD64_OpSubMaskedUint64x2(v)
-       case OpSubMaskedUint64x4:
-               return rewriteValueAMD64_OpSubMaskedUint64x4(v)
-       case OpSubMaskedUint64x8:
-               return rewriteValueAMD64_OpSubMaskedUint64x8(v)
-       case OpSubMaskedUint8x16:
-               return rewriteValueAMD64_OpSubMaskedUint8x16(v)
-       case OpSubMaskedUint8x32:
-               return rewriteValueAMD64_OpSubMaskedUint8x32(v)
-       case OpSubMaskedUint8x64:
-               return rewriteValueAMD64_OpSubMaskedUint8x64(v)
         case OpSubPairsFloat32x4:
                 v.Op = OpAMD64VHSUBPS128
                 return true
@@ -5673,30 +4005,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpSubSaturatedInt8x64:
                 v.Op = OpAMD64VPSUBSB512
                 return true
-       case OpSubSaturatedMaskedInt16x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v)
-       case OpSubSaturatedMaskedInt16x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v)
-       case OpSubSaturatedMaskedInt16x8:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v)
-       case OpSubSaturatedMaskedInt8x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v)
-       case OpSubSaturatedMaskedInt8x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v)
-       case OpSubSaturatedMaskedInt8x64:
-               return rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v)
-       case OpSubSaturatedMaskedUint16x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v)
-       case OpSubSaturatedMaskedUint16x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v)
-       case OpSubSaturatedMaskedUint16x8:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v)
-       case OpSubSaturatedMaskedUint8x16:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v)
-       case OpSubSaturatedMaskedUint8x32:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v)
-       case OpSubSaturatedMaskedUint8x64:
-               return rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v)
         case OpSubSaturatedUint16x16:
                 v.Op = OpAMD64VPSUBUSW256
                 return true
@@ -5794,18 +4102,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpTruncScaledFloat64x4(v)
         case OpTruncScaledFloat64x8:
                 return rewriteValueAMD64_OpTruncScaledFloat64x8(v)
-       case OpTruncScaledMaskedFloat32x16:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v)
-       case OpTruncScaledMaskedFloat32x4:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v)
-       case OpTruncScaledMaskedFloat32x8:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v)
-       case OpTruncScaledMaskedFloat64x2:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v)
-       case OpTruncScaledMaskedFloat64x4:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v)
-       case OpTruncScaledMaskedFloat64x8:
-               return rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v)
         case OpTruncScaledResidueFloat32x16:
                 return rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v)
         case OpTruncScaledResidueFloat32x4:
@@ -5818,18 +4114,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpTruncScaledResidueFloat64x4(v)
         case OpTruncScaledResidueFloat64x8:
                 return rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v)
-       case OpTruncScaledResidueMaskedFloat32x16:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v)
-       case OpTruncScaledResidueMaskedFloat32x4:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v)
-       case OpTruncScaledResidueMaskedFloat32x8:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v)
-       case OpTruncScaledResidueMaskedFloat64x2:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v)
-       case OpTruncScaledResidueMaskedFloat64x4:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v)
-       case OpTruncScaledResidueMaskedFloat64x8:
-               return rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v)
         case OpWB:
                 v.Op = OpAMD64LoweredWB
                 return true
@@ -5881,30 +4165,6 @@ func rewriteValueAMD64(v *Value) bool {
         case OpXorInt8x64:
                 v.Op = OpAMD64VPXORD512
                 return true
-       case OpXorMaskedInt32x16:
-               return rewriteValueAMD64_OpXorMaskedInt32x16(v)
-       case OpXorMaskedInt32x4:
-               return rewriteValueAMD64_OpXorMaskedInt32x4(v)
-       case OpXorMaskedInt32x8:
-               return rewriteValueAMD64_OpXorMaskedInt32x8(v)
-       case OpXorMaskedInt64x2:
-               return rewriteValueAMD64_OpXorMaskedInt64x2(v)
-       case OpXorMaskedInt64x4:
-               return rewriteValueAMD64_OpXorMaskedInt64x4(v)
-       case OpXorMaskedInt64x8:
-               return rewriteValueAMD64_OpXorMaskedInt64x8(v)
-       case OpXorMaskedUint32x16:
-               return rewriteValueAMD64_OpXorMaskedUint32x16(v)
-       case OpXorMaskedUint32x4:
-               return rewriteValueAMD64_OpXorMaskedUint32x4(v)
-       case OpXorMaskedUint32x8:
-               return rewriteValueAMD64_OpXorMaskedUint32x8(v)
-       case OpXorMaskedUint64x2:
-               return rewriteValueAMD64_OpXorMaskedUint64x2(v)
-       case OpXorMaskedUint64x4:
-               return rewriteValueAMD64_OpXorMaskedUint64x4(v)
-       case OpXorMaskedUint64x8:
-               return rewriteValueAMD64_OpXorMaskedUint64x8(v)
         case OpXorUint16x16:
                 v.Op = OpAMD64VPXOR256
                 return true
@@ -27893,66 +26153,6 @@ func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLDMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLDMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28007,66 +26207,6 @@ func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLQMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLQMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28121,66 +26261,6 @@ func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSLLWMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSLLWMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28235,66 +26315,6 @@ func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRADMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRADMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28349,66 +26369,6 @@ func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAQMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRAQMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -28463,66 +26423,6 @@ func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked128 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked128const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked128const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked256 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked256const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked256const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPSRAWMasked512 x (MOVQconst [c]) mask)
-       // result: (VPSRAWMasked512const [uint8(c)] x mask)
-       for {
-               x := v_0
-               if v_1.Op != OpAMD64MOVQconst {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked512const)
-               v.AuxInt = uint8ToAuxInt(uint8(c))
-               v.AddArg2(x, mask)
-               return true
-       }
-       return false
-}
  func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -29423,27011 +27323,11273 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpAbsMaskedInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpAddr(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt16x16 x mask)
-       // result: (VPABSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Addr {sym} base)
+       // result: (LEAQ {sym} base)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               sym := auxToSym(v.Aux)
+               base := v_0
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v.AddArg(base)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AbsMaskedInt16x32 x mask)
-       // result: (VPABSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicAdd32 ptr val mem)
+       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64AddTupleFirst32)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg2(val, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AbsMaskedInt16x8 x mask)
-       // result: (VPABSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicAdd64 ptr val mem)
+       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64AddTupleFirst64)
+               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg2(val, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x16 x mask)
-       // result: (VPABSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicAnd32 ptr val mem)
+       // result: (ANDLlock ptr val mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ANDLlock)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x4 x mask)
-       // result: (VPABSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicAnd32value ptr val mem)
+       // result: (LoweredAtomicAnd32 ptr val mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicAnd32)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt32x8 x mask)
-       // result: (VPABSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicAnd64value ptr val mem)
+       // result: (LoweredAtomicAnd64 ptr val mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicAnd64)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x2 x mask)
-       // result: (VPABSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (AtomicAnd8 ptr val mem)
+       // result: (ANDBlock ptr val mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ANDBlock)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x4 x mask)
-       // result: (VPABSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
+       // result: (CMPXCHGLlock ptr old new_ mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               old := v_1
+               new_ := v_2
+               mem := v_3
+               v.reset(OpAMD64CMPXCHGLlock)
+               v.AddArg4(ptr, old, new_, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt64x8 x mask)
-       // result: (VPABSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
+       // result: (CMPXCHGQlock ptr old new_ mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               old := v_1
+               new_ := v_2
+               mem := v_3
+               v.reset(OpAMD64CMPXCHGQlock)
+               v.AddArg4(ptr, old, new_, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt8x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x16 x mask)
-       // result: (VPABSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (AtomicExchange32 ptr val mem)
+       // result: (XCHGL val ptr mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGL)
+               v.AddArg3(val, ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x32 x mask)
-       // result: (VPABSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (AtomicExchange64 ptr val mem)
+       // result: (XCHGQ val ptr mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGQ)
+               v.AddArg3(val, ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAbsMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AbsMaskedInt8x64 x mask)
-       // result: (VPABSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (AtomicExchange8 ptr val mem)
+       // result: (XCHGB val ptr mem)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPABSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64XCHGB)
+               v.AddArg3(val, ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x16 x y z mask)
-       // result: (VPDPWSSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicLoad32 ptr mem)
+       // result: (MOVLatomicload ptr mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVLatomicload)
+               v.AddArg2(ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x4 x y z mask)
-       // result: (VPDPWSSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicLoad64 ptr mem)
+       // result: (MOVQatomicload ptr mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg2(ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdPairsSaturatedMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdPairsSaturatedMaskedInt32x8 x y z mask)
-       // result: (VPDPWSSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicLoad8 ptr mem)
+       // result: (MOVBatomicload ptr mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPWSSDSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVBatomicload)
+               v.AddArg2(ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x16 x y z mask)
-       // result: (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicLoadPtr ptr mem)
+       // result: (MOVQatomicload ptr mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64MOVQatomicload)
+               v.AddArg2(ptr, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr32(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x4 x y z mask)
-       // result: (VPDPBUSDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicOr32 ptr val mem)
+       // result: (ORLlock ptr val mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ORLlock)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleMaskedInt32x8 x y z mask)
-       // result: (VPDPBUSDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (AtomicOr32value ptr val mem)
+       // result: (LoweredAtomicOr32 ptr val mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicOr32)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x16 x y z mask)
-       // result: (VPDPBUSDSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (AtomicOr64value ptr val mem)
+       // result: (LoweredAtomicOr64 ptr val mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64LoweredAtomicOr64)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicOr8(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x4 x y z mask)
-       // result: (VPDPBUSDSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (AtomicOr8 ptr val mem)
+       // result: (ORBlock ptr val mem)
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64ORBlock)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddDotProdQuadrupleSaturatedMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
+func rewriteValueAMD64_OpAtomicStore32(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddDotProdQuadrupleSaturatedMaskedInt32x8 x y z mask)
-       // result: (VPDPBUSDSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore32 ptr val mem)
+       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
         for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPDPBUSDSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStore64(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedFloat32x16 x y mask)
-       // result: (VADDPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore64 ptr val mem)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStore8(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedFloat32x4 x y mask)
-       // result: (VADDPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStore8 ptr val mem)
+       // result: (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedFloat32x8 x y mask)
-       // result: (VADDPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (AtomicStorePtrNoWB ptr val mem)
+       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
+               v0.AddArg3(val, ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedFloat64x2 x y mask)
-       // result: (VADDPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen16 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSRL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpAddMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedFloat64x4 x y mask)
-       // result: (VADDPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (BitLen16 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x))))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpAddMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedFloat64x8 x y mask)
-       // result: (VADDPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen32 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VADDPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64)
+               v1.AuxInt = int32ToAuxInt(1)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v2.AddArg(x)
+               v1.AddArg2(v2, v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpAddMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt16x16 x y mask)
-       // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (BitLen32 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL x)))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpAddMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedInt16x32 x y mask)
-       // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen64 <t> x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
+               v1 := b.NewValue0(v.Pos, OpSelect0, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v3.AuxInt = int64ToAuxInt(-1)
+               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4.AddArg(v2)
+               v0.AddArg3(v1, v3, v4)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpAddMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt16x8 x y mask)
-       // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (BitLen64 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-64] (LZCNTQ x)))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-64)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpAddMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBitLen8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedInt32x16 x y mask)
-       // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (BitLen8 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSRL)
+               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
+               v1.AddArg(x)
+               v0.AddArg2(v1, v1)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (BitLen8 <t> x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
+       for {
+               t := v.Type
+               x := v_0
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64NEGQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
+               v0.AuxInt = int32ToAuxInt(-32)
+               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type)
+               v2.AddArg(x)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpAddMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpBswap16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt32x4 x y mask)
-       // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Bswap16 x)
+       // result: (ROLWconst [8] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROLWconst)
+               v.AuxInt = int8ToAuxInt(8)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeil(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt32x8 x y mask)
-       // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ceil x)
+       // result: (ROUNDSD [2] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x2 x y mask)
-       // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CeilFloat32x4 x)
+       // result: (VROUNDPS128 [2] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x4 x y mask)
-       // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CeilFloat32x8 x)
+       // result: (VROUNDPS256 [2] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt64x8 x y mask)
-       // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CeilFloat64x2 x)
+       // result: (VROUNDPD128 [2] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x16 x y mask)
-       // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CeilFloat64x4 x)
+       // result: (VROUNDPD256 [2] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x32 x y mask)
-       // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedInt8x64 x y mask)
-       // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x16 x y mask)
-       // result: (VPADDWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x32 x y mask)
-       // result: (VPADDWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint16x8 x y mask)
-       // result: (VPADDWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x16 x y mask)
-       // result: (VPADDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CeilScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x4 x y mask)
-       // result: (VPADDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CeilScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (AddMaskedUint32x8 x y mask)
-       // result: (VPADDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CeilScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+2] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (CeilScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+2] x)
+       for {
+               a := auxIntToUint8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 2)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint64x2 x y mask)
-       // result: (VPADDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x16 x mask)
+       // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint64x4 x y mask)
-       // result: (VPADDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x4 x mask)
+       // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint64x8 x y mask)
-       // result: (VPADDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressFloat32x8 x mask)
+       // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPSMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint8x16 x y mask)
-       // result: (VPADDBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x2 x mask)
+       // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint8x32 x y mask)
-       // result: (VPADDBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x4 x mask)
+       // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddMaskedUint8x64 x y mask)
-       // result: (VPADDBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CompressFloat64x8 x mask)
+       // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VCOMPRESSPDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt16x16 x y mask)
-       // result: (VPADDSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CompressInt16x16 x mask)
+       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked256)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked256)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt16x32 x y mask)
-       // result: (VPADDSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CompressInt16x32 x mask)
+       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked512)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt16x8 x y mask)
-       // result: (VPADDSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CompressInt16x8 x mask)
+       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSWMasked128)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked128)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt8x16 x y mask)
-       // result: (VPADDSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressInt32x16 x mask)
+       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt8x32 x y mask)
-       // result: (VPADDSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressInt32x4 x mask)
+       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedInt8x64 x y mask)
-       // result: (VPADDSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CompressInt32x8 x mask)
+       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint16x16 x y mask)
-       // result: (VPADDUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CompressInt64x2 x mask)
+       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint16x32 x y mask)
-       // result: (VPADDUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CompressInt64x4 x mask)
+       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint16x8 x y mask)
-       // result: (VPADDUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CompressInt64x8 x mask)
+       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint8x16 x y mask)
-       // result: (VPADDUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CompressInt8x16 x mask)
+       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked128)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked128)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint8x32 x y mask)
-       // result: (VPADDUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CompressInt8x32 x mask)
+       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked256)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked256)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAddSaturatedMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AddSaturatedMaskedUint8x64 x y mask)
-       // result: (VPADDUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
+       // match: (CompressInt8x64 x mask)
+       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPADDUSBMasked512)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAddr(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Addr {sym} base)
-       // result: (LEAQ {sym} base)
-       for {
-               sym := auxToSym(v.Aux)
-               base := v_0
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v.AddArg(base)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt32x16 x y mask)
-       // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CompressUint16x16 x mask)
+       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt32x4 x y mask)
-       // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CompressUint16x32 x mask)
+       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt32x8 x y mask)
-       // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CompressUint16x8 x mask)
+       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt64x2 x y mask)
-       // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressUint32x16 x mask)
+       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt64x4 x y mask)
-       // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressUint32x4 x mask)
+       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedInt64x8 x y mask)
-       // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressUint32x8 x mask)
+       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint32x16 x y mask)
-       // result: (VPANDDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CompressUint64x2 x mask)
+       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint32x4 x y mask)
-       // result: (VPANDDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CompressUint64x4 x mask)
+       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint32x8 x y mask)
-       // result: (VPANDDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CompressUint64x8 x mask)
+       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint64x2 x y mask)
-       // result: (VPANDQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CompressUint8x16 x mask)
+       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint64x4 x y mask)
-       // result: (VPANDQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CompressUint8x32 x mask)
+       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndMaskedUint64x8 x y mask)
-       // result: (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CompressUint8x64 x mask)
+       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               mask := v_1
+               v.reset(OpAMD64VPCOMPRESSBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpAndNotMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpCondSelect(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (AndNotMaskedInt32x16 x y mask)
-       // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQ y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQ)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt32x4 x y mask)
-       // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt32x8 x y mask)
-       // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLT y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x2 x y mask)
-       // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGT y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x4 x y mask)
-       // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedInt64x8 x y mask)
-       // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x16 x y mask)
-       // result: (VPANDNDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQHI y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQHI)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x4 x y mask)
-       // result: (VPANDNDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCS y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQCS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint32x8 x y mask)
-       // result: (VPANDNDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQCC y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQCC)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x2 x y mask)
-       // result: (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQLS y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQLS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x4 x y mask)
-       // result: (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQEQF y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAndNotMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AndNotMaskedUint64x8 x y mask)
-       // result: (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNEF y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPANDNQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAtomicAdd32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicAdd32 ptr val mem)
-       // result: (AddTupleFirst32 val (XADDLlock val ptr mem))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGTF y x cond)
         for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64AddTupleFirst32)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDLlock, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg2(val, v0)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGTF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAtomicAdd64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicAdd64 ptr val mem)
-       // result: (AddTupleFirst64 val (XADDQlock val ptr mem))
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQGEF y x cond)
         for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64AddTupleFirst64)
-               v0 := b.NewValue0(v.Pos, OpAMD64XADDQlock, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg2(val, v0)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQGEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAtomicAnd32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd32 ptr val mem)
-       // result: (ANDLlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ANDLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd32value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd32value ptr val mem)
-       // result: (LoweredAtomicAnd32 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicAnd32)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd64value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd64value ptr val mem)
-       // result: (LoweredAtomicAnd64 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicAnd64)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicAnd8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicAnd8 ptr val mem)
-       // result: (ANDBlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ANDBlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicCompareAndSwap32 ptr old new_ mem)
-       // result: (CMPXCHGLlock ptr old new_ mem)
-       for {
-               ptr := v_0
-               old := v_1
-               new_ := v_2
-               mem := v_3
-               v.reset(OpAMD64CMPXCHGLlock)
-               v.AddArg4(ptr, old, new_, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicCompareAndSwap64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicCompareAndSwap64 ptr old new_ mem)
-       // result: (CMPXCHGQlock ptr old new_ mem)
-       for {
-               ptr := v_0
-               old := v_1
-               new_ := v_2
-               mem := v_3
-               v.reset(OpAMD64CMPXCHGQlock)
-               v.AddArg4(ptr, old, new_, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange32 ptr val mem)
-       // result: (XCHGL val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGL)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange64 ptr val mem)
-       // result: (XCHGQ val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGQ)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicExchange8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicExchange8 ptr val mem)
-       // result: (XCHGB val ptr mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64XCHGB)
-               v.AddArg3(val, ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad32 ptr mem)
-       // result: (MOVLatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVLatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad64 ptr mem)
-       // result: (MOVQatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoad8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoad8 ptr mem)
-       // result: (MOVBatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVBatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicLoadPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicLoadPtr ptr mem)
-       // result: (MOVQatomicload ptr mem)
-       for {
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64MOVQatomicload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr32 ptr val mem)
-       // result: (ORLlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ORLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr32value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr32value ptr val mem)
-       // result: (LoweredAtomicOr32 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicOr32)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr64value(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr64value ptr val mem)
-       // result: (LoweredAtomicOr64 ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64LoweredAtomicOr64)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicOr8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (AtomicOr8 ptr val mem)
-       // result: (ORBlock ptr val mem)
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64ORBlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore32 ptr val mem)
-       // result: (Select1 (XCHGL <types.NewTuple(typ.UInt32,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGL, types.NewTuple(typ.UInt32, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore64 ptr val mem)
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.UInt64,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.UInt64, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStore8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStore8 ptr val mem)
-       // result: (Select1 (XCHGB <types.NewTuple(typ.UInt8,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGB, types.NewTuple(typ.UInt8, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAtomicStorePtrNoWB(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (AtomicStorePtrNoWB ptr val mem)
-       // result: (Select1 (XCHGQ <types.NewTuple(typ.BytePtr,types.TypeMem)> val ptr mem))
-       for {
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64XCHGQ, types.NewTuple(typ.BytePtr, types.TypeMem))
-               v0.AddArg3(val, ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x16 x y mask)
-       // result: (VPAVGWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQ y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQ)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x32 x y mask)
-       // result: (VPAVGWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAverageMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint16x8 x y mask)
-       // result: (VPAVGWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLT y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x16 x y mask)
-       // result: (VPAVGBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGT y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x32 x y mask)
-       // result: (VPAVGBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpAverageMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (AverageMaskedUint8x64 x y mask)
-       // result: (VPAVGBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGE y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPAVGBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBitLen16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen16 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
+               if v_2.Op != OpAMD64SETGE {
                         break
                 }
-               v.reset(OpAMD64BSRL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg2(v1, v1)
-               v.AddArg(v0)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-       // match: (BitLen16 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVWQZX <x.Type> x))))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLHI y x cond)
         for {
                 t := v.Type
                 x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               y := v_1
+               if v_2.Op != OpAMD64SETA {
                         break
                 }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, x.Type)
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen32 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                         break
                 }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpAMD64LEAQ1, typ.UInt64)
-               v1.AuxInt = int32ToAuxInt(1)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v2.AddArg(x)
-               v1.AddArg2(v2, v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
+               v.reset(OpAMD64CMOVLHI)
+               v.AddArg3(y, x, cond)
                 return true
         }
-       // match: (BitLen32 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL x)))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCS y x cond)
         for {
                 t := v.Type
                 x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               y := v_1
+               if v_2.Op != OpAMD64SETB {
                         break
                 }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen64 <t> x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(1)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMOVQEQ, t)
-               v1 := b.NewValue0(v.Pos, OpSelect0, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64BSRQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v3.AuxInt = int64ToAuxInt(-1)
-               v4 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4.AddArg(v2)
-               v0.AddArg3(v1, v3, v4)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (BitLen64 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-64] (LZCNTQ x)))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-64)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTQ, typ.UInt64)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBitLen8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (BitLen8 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSRL)
-               v0 := b.NewValue0(v.Pos, OpAMD64LEAL1, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
-               v1.AddArg(x)
-               v0.AddArg2(v1, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (BitLen8 <t> x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (NEGQ (ADDQconst <t> [-32] (LZCNTL (MOVBQZX <x.Type> x))))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
                         break
                 }
-               v.reset(OpAMD64NEGQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64ADDQconst, t)
-               v0.AuxInt = int32ToAuxInt(-32)
-               v1 := b.NewValue0(v.Pos, OpAMD64LZCNTL, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, x.Type)
-               v2.AddArg(x)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedFloat64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast128MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast128MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedFloat64x2 x mask)
-       // result: (VBROADCASTSDMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked256 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked256 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked256 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast256MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast256MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked256 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedFloat32x4 x mask)
-       // result: (VBROADCASTSSMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedFloat64x2 x mask)
-       // result: (VBROADCASTSDMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VBROADCASTSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt16x8 x mask)
-       // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt32x4 x mask)
-       // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt64x2 x mask)
-       // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedInt8x16 x mask)
-       // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint16x8 x mask)
-       // result: (VPBROADCASTWMasked512 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint32x4 x mask)
-       // result: (VPBROADCASTDMasked512 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint64x2 x mask)
-       // result: (VPBROADCASTQMasked512 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBroadcast512MaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Broadcast512MaskedUint8x16 x mask)
-       // result: (VPBROADCASTBMasked512 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPBROADCASTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpBswap16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Bswap16 x)
-       // result: (ROLWconst [8] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROLWconst)
-               v.AuxInt = int8ToAuxInt(8)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeil(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ceil x)
-       // result: (ROUNDSD [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat32x4 x)
-       // result: (VROUNDPS128 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat32x8 x)
-       // result: (VROUNDPS256 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat64x2 x)
-       // result: (VROUNDPD128 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilFloat64x4 x)
-       // result: (VROUNDPD256 [2] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (CeilScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+2] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+2] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+2] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+2] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+2] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+2] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCeilScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CeilScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+2] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x16 x mask)
-       // result: (VCOMPRESSPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x4 x mask)
-       // result: (VCOMPRESSPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat32x8 x mask)
-       // result: (VCOMPRESSPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x2 x mask)
-       // result: (VCOMPRESSPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x4 x mask)
-       // result: (VCOMPRESSPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressFloat64x8 x mask)
-       // result: (VCOMPRESSPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCOMPRESSPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x16 x mask)
-       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x32 x mask)
-       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt16x8 x mask)
-       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x16 x mask)
-       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x4 x mask)
-       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt32x8 x mask)
-       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x2 x mask)
-       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x4 x mask)
-       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt64x8 x mask)
-       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x16 x mask)
-       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x32 x mask)
-       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressInt8x64 x mask)
-       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x16 x mask)
-       // result: (VPCOMPRESSWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x32 x mask)
-       // result: (VPCOMPRESSWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint16x8 x mask)
-       // result: (VPCOMPRESSWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x16 x mask)
-       // result: (VPCOMPRESSDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x4 x mask)
-       // result: (VPCOMPRESSDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint32x8 x mask)
-       // result: (VPCOMPRESSDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x2 x mask)
-       // result: (VPCOMPRESSQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x4 x mask)
-       // result: (VPCOMPRESSQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint64x8 x mask)
-       // result: (VPCOMPRESSQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x16 x mask)
-       // result: (VPCOMPRESSBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x32 x mask)
-       // result: (VPCOMPRESSBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCompressUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CompressUint8x64 x mask)
-       // result: (VPCOMPRESSBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPCOMPRESSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCondSelect(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: is32BitInt(t)
-       // result: (CMOVLGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQ cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWEQ y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQ {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWEQ)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWNE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETL cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETL {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETG cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGT y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETG {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGT)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETLE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETLE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGE y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGE)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETA cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWHI y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETA {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWHI)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETB cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWCS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETB {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWCS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETAE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWCC y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETAE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWCC)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETBE cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWLS y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETBE {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWLS)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETEQF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWEQF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETEQF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWEQF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETNEF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWNEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETNEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGTF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGTF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y (SETGEF cond))
-       // cond: is16BitInt(t)
-       // result: (CMOVWGEF y x cond)
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if v_2.Op != OpAMD64SETGEF {
-                       break
-               }
-               cond := v_2.Args[0]
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWGEF)
-               v.AddArg3(y, x, cond)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
-       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
-       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
-       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
-                       break
-               }
-               v.reset(OpCondSelect)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
-               v0.AddArg(check)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
-       // result: (CMOVQNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
-       // result: (CMOVLNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVLNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       // match: (CondSelect <t> x y check)
-       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
-       // result: (CMOVWNE y x (CMPQconst [0] check))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               check := v_2
-               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64CMOVWNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v0.AddArg(check)
-               v.AddArg3(y, x, v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpConst16(v *Value) bool {
-       // match: (Const16 [c])
-       // result: (MOVLconst [int32(c)])
-       for {
-               c := auxIntToInt16(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConst8(v *Value) bool {
-       // match: (Const8 [c])
-       // result: (MOVLconst [int32(c)])
-       for {
-               c := auxIntToInt8(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(int32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConstBool(v *Value) bool {
-       // match: (ConstBool [c])
-       // result: (MOVLconst [b2i32(c)])
-       for {
-               c := auxIntToBool(v.AuxInt)
-               v.reset(OpAMD64MOVLconst)
-               v.AuxInt = int32ToAuxInt(b2i32(c))
-               return true
-       }
-}
-func rewriteValueAMD64_OpConstNil(v *Value) bool {
-       // match: (ConstNil )
-       // result: (MOVQconst [0])
-       for {
-               v.reset(OpAMD64MOVQconst)
-               v.AuxInt = int64ToAuxInt(0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x16 x mask)
-       // result: (VCVTTPS2DQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x4 x mask)
-       // result: (VCVTTPS2DQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToInt32MaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToInt32MaskedFloat32x8 x mask)
-       // result: (VCVTTPS2DQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTTPS2DQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x16 x mask)
-       // result: (VCVTPS2UDQMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x4 x mask)
-       // result: (VCVTPS2UDQMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpConvertToUint32MaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ConvertToUint32MaskedFloat32x8 x mask)
-       // result: (VCVTPS2UDQMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VCVTPS2UDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz16 x)
-       // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
-       for {
-               x := v_0
-               v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1 << 16)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz16NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz16NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz32 x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz32 x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64)
-               v1.AuxInt = int8ToAuxInt(32)
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz32NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz32NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz64 x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTQ x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz64 <t> x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
-       for {
-               t := v.Type
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64CMOVQEQ)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1.AddArg(x)
-               v0.AddArg(v1)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
-               v2.AuxInt = int64ToAuxInt(64)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v3.AddArg(v1)
-               v.AddArg3(v0, v2, v3)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz64NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTQ x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTQ)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz64NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (Select0 (BSFQ x))
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCtz8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Ctz8 x)
-       // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
-       for {
-               x := v_0
-               v.reset(OpAMD64BSFL)
-               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(1 << 8)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Ctz8NonZero x)
-       // cond: buildcfg.GOAMD64 >= 3
-       // result: (TZCNTL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 >= 3) {
-                       break
-               }
-               v.reset(OpAMD64TZCNTL)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Ctz8NonZero x)
-       // cond: buildcfg.GOAMD64 < 3
-       // result: (BSFL x)
-       for {
-               x := v_0
-               if !(buildcfg.GOAMD64 < 3) {
-                       break
-               }
-               v.reset(OpAMD64BSFL)
-               v.AddArg(x)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask16x16 <t> x)
-       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask32x16 <t> x)
-       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt16toMask8x16 <t> x)
-       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt32toMask16x32 <t> x)
-       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVDk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt32toMask8x32 <t> x)
-       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVDk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt64toMask8x64 <t> x)
-       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask16x8 <t> x)
-       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask32x4 <t> x)
-       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask32x8 <t> x)
-       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x2 <t> x)
-       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x4 <t> x)
-       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Cvt8toMask64x8 <t> x)
-       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x32to32 <t> x)
-       // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVDi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask16x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x4to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask32x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x2to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x4to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask64x8to8 <t> x)
-       // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVBi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x16to16 <t> x)
-       // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVWi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x32to32 <t> x)
-       // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVDi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (CvtMask8x64to64 <t> x)
-       // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
-       for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64KMOVQi)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div16 [a] x y)
-       // result: (Select0 (DIVW [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv16u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div16u x y)
-       // result: (Select0 (DIVWU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div32 [a] x y)
-       // result: (Select0 (DIVL [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div32u x y)
-       // result: (Select0 (DIVLU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div64 [a] x y)
-       // result: (Select0 (DIVQ [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv64u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div64u x y)
-       // result: (Select0 (DIVQU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div8 x y)
-       // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDiv8u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Div8u x y)
-       // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect0)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x16 x y mask)
-       // result: (VDIVPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x4 x y mask)
-       // result: (VDIVPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat32x8 x y mask)
-       // result: (VDIVPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x2 x y mask)
-       // result: (VDIVPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x4 x y mask)
-       // result: (VDIVPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDivMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DivMaskedFloat64x8 x y mask)
-       // result: (VDIVPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VDIVPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x16 x y mask)
-       // result: (VPMADDWDMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x32 x y mask)
-       // result: (VPMADDWDMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsMaskedInt16x8 x y mask)
-       // result: (VPMADDWDMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDWDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x16 x y mask)
-       // result: (VPMADDUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x32 x y mask)
-       // result: (VPMADDUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpDotProdPairsSaturatedMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (DotProdPairsSaturatedMaskedUint8x64 x y mask)
-       // result: (VPMADDUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMADDUBSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq16 x y)
-       // result: (SETEQ (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq32 x y)
-       // result: (SETEQ (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq32F x y)
-       // result: (SETEQF (UCOMISS x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq64 x y)
-       // result: (SETEQ (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq64F x y)
-       // result: (SETEQF (UCOMISD x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Eq8 x y)
-       // result: (SETEQ (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqB(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (EqB x y)
-       // result: (SETEQ (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (EqPtr x y)
-       // result: (SETEQ (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETEQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat32x4 x y)
-       // result: (VCMPPS128 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat32x8 x y)
-       // result: (VCMPPS256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat64x2 x y)
-       // result: (VCMPPD128 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (EqualFloat64x4 x y)
-       // result: (VCMPPD256 [0] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [0] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [0] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [0] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [0] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [0] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [0] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [0] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [0] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [0] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x16 x mask)
-       // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x4 x mask)
-       // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat32x8 x mask)
-       // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x2 x mask)
-       // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x4 x mask)
-       // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandFloat64x8 x mask)
-       // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VEXPANDPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x16 x mask)
-       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x32 x mask)
-       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt16x8 x mask)
-       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x16 x mask)
-       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x4 x mask)
-       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt32x8 x mask)
-       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x2 x mask)
-       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x4 x mask)
-       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt64x8 x mask)
-       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x16 x mask)
-       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x32 x mask)
-       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandInt8x64 x mask)
-       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x16 x mask)
-       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x32 x mask)
-       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint16x8 x mask)
-       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x16 x mask)
-       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x4 x mask)
-       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint32x8 x mask)
-       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x2 x mask)
-       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x4 x mask)
-       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint64x8 x mask)
-       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x16 x mask)
-       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x32 x mask)
-       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ExpandUint8x64 x mask)
-       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPEXPANDBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFMA(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (FMA x y z)
-       // result: (VFMADD231SD z x y)
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               v.reset(OpAMD64VFMADD231SD)
-               v.AddArg3(z, x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloor(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Floor x)
-       // result: (ROUNDSD [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat32x4 x)
-       // result: (VROUNDPS128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat32x8 x)
-       // result: (VROUNDPS256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat64x2 x)
-       // result: (VROUNDPD128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorFloat64x4 x)
-       // result: (VROUNDPD256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (FloorScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+1] x)
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+1] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+1] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+1] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+1] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+1] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpFloorScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (FloorScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+1] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 1)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x16 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x32 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformInverseMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformInverseMaskedUint8x64 [a] x y mask)
-       // result: (VGF2P8AFFINEINVQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x16 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked128 [a] x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x32 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked256 [a] x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldAffineTransformMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldAffineTransformMaskedUint8x64 [a] x y mask)
-       // result: (VGF2P8AFFINEQBMasked512 [a] x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8AFFINEQBMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x16 x y mask)
-       // result: (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x32 x y mask)
-       // result: (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGaloisFieldMulMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (GaloisFieldMulMaskedUint8x64 x y mask)
-       // result: (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VGF2P8MULBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetG(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetG mem)
-       // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal
-       // result: (LoweredGetG mem)
-       for {
-               mem := v_0
-               if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) {
-                       break
-               }
-               v.reset(OpAMD64LoweredGetG)
-               v.AddArg(mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat32x16 x)
-       // result: (VEXTRACTF64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat32x8 x)
-       // result: (VEXTRACTF128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat64x4 x)
-       // result: (VEXTRACTF128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiFloat64x8 x)
-       // result: (VEXTRACTF64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt16x16 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt16x32 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt32x16 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt32x8 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt64x4 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt64x8 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt8x32 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiInt8x64 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint16x16 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint16x32 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint32x16 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint32x8 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint64x4 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint64x8 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint8x32 x)
-       // result: (VEXTRACTI128128 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetHiUint8x64 x)
-       // result: (VEXTRACTI64X4256 [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat32x16 x)
-       // result: (VEXTRACTF64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat32x8 x)
-       // result: (VEXTRACTF128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat64x4 x)
-       // result: (VEXTRACTF128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoFloat64x8 x)
-       // result: (VEXTRACTF64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTF64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt16x16 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt16x32 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt32x16 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt32x8 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt64x4 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt64x8 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt8x32 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoInt8x64 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint16x16 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint16x32 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint32x16 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint32x8 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint64x4 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint64x8 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint8x32 x)
-       // result: (VEXTRACTI128128 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI128128)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (GetLoUint8x64 x)
-       // result: (VEXTRACTI64X4256 [0] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64VEXTRACTI64X4256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat32x4 x y)
-       // result: (VCMPPS128 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat32x8 x y)
-       // result: (VCMPPS256 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat64x2 x y)
-       // result: (VCMPPD128 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterEqualFloat64x4 x y)
-       // result: (VCMPPD256 [13] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(13)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [13] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [13] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [13] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [13] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [13] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [13] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [13] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [13] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [13] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [13] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(13)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat32x4 x y)
-       // result: (VCMPPS128 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat32x8 x y)
-       // result: (VCMPPS256 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat64x2 x y)
-       // result: (VCMPPD128 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (GreaterFloat64x4 x y)
-       // result: (VCMPPD256 [14] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(14)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [14] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [14] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [14] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [14] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [14] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [14] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [14] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [14] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [14] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [14] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [14] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [14] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (HasCPUFeature {s})
-       // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s})))
-       for {
-               s := auxToSym(v.Aux)
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v0.AuxInt = int32ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64)
-               v1.Aux = symToAux(s)
-               v0.AddArg(v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsInBounds idx len)
-       // result: (SETB (CMPQ idx len))
-       for {
-               idx := v_0
-               len := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(idx, len)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat32x4 x y)
-       // result: (VCMPPS128 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat32x8 x y)
-       // result: (VCMPPS256 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat64x2 x y)
-       // result: (VCMPPD128 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (IsNanFloat64x4 x y)
-       // result: (VCMPPD256 [3] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(3)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [3] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [3] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [3] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [3] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [3] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNanMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (IsNanMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [3] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(3)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsNonNil p)
-       // result: (SETNE (TESTQ p p))
-       for {
-               p := v_0
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags)
-               v0.AddArg2(p, p)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (IsSliceInBounds idx len)
-       // result: (SETBE (CMPQ idx len))
-       for {
-               idx := v_0
-               len := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(idx, len)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq16 x y)
-       // result: (SETLE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq16U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq16U x y)
-       // result: (SETBE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32 x y)
-       // result: (SETLE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32F x y)
-       // result: (SETGEF (UCOMISS y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq32U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq32U x y)
-       // result: (SETBE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64 x y)
-       // result: (SETLE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64F x y)
-       // result: (SETGEF (UCOMISD y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq64U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq64U x y)
-       // result: (SETBE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq8 x y)
-       // result: (SETLE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETLE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLeq8U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Leq8U x y)
-       // result: (SETBE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETBE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less16 x y)
-       // result: (SETL (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess16U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less16U x y)
-       // result: (SETB (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32 x y)
-       // result: (SETL (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32F x y)
-       // result: (SETGF (UCOMISS y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess32U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less32U x y)
-       // result: (SETB (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64 x y)
-       // result: (SETL (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64F x y)
-       // result: (SETGF (UCOMISD y x))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETGF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(y, x)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess64U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less64U x y)
-       // result: (SETB (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less8 x y)
-       // result: (SETL (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETL)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLess8U(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Less8U x y)
-       // result: (SETB (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETB)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat32x4 x y)
-       // result: (VCMPPS128 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat32x8 x y)
-       // result: (VCMPPS256 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat64x2 x y)
-       // result: (VCMPPD128 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessEqualFloat64x4 x y)
-       // result: (VCMPPD256 [2] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(2)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [2] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [2] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [2] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [2] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [2] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [2] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [2] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [2] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [2] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [2] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(2)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat32x4 x y)
-       // result: (VCMPPS128 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat32x8 x y)
-       // result: (VCMPPS256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat64x2 x y)
-       // result: (VCMPPD128 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (LessFloat64x4 x y)
-       // result: (VCMPPD256 [1] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [1] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [1] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [1] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [1] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [1] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [1] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [1] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [1] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [1] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [1] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [1] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [1] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLessUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LessUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(1)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoad(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (Load <t> ptr mem)
-       // cond: (is64BitInt(t) || isPtr(t))
-       // result: (MOVQload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is64BitInt(t) || isPtr(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVQload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is32BitInt(t)
-       // result: (MOVLload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is32BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVLload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is16BitInt(t)
-       // result: (MOVWload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is16BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVWload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: (t.IsBoolean() || is8BitInt(t))
-       // result: (MOVBload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.IsBoolean() || is8BitInt(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVBload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is32BitFloat(t)
-       // result: (MOVSSload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is32BitFloat(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSSload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: is64BitFloat(t)
-       // result: (MOVSDload ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(is64BitFloat(t)) {
-                       break
-               }
-               v.reset(OpAMD64MOVSDload)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 16
-       // result: (VMOVDQUload128 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload128)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 32
-       // result: (VMOVDQUload256 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload256)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       // match: (Load <t> ptr mem)
-       // cond: t.Size() == 64
-       // result: (VMOVDQUload512 ptr mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VMOVDQUload512)
-               v.AddArg2(ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x16 <t> ptr mem)
-       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x32 <t> ptr mem)
-       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x8 <t> ptr mem)
-       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x16 <t> ptr mem)
-       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x4 <t> ptr mem)
-       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x8 <t> ptr mem)
-       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x2 <t> ptr mem)
-       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x4 <t> ptr mem)
-       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x8 <t> ptr mem)
-       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x16 <t> ptr mem)
-       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x32 <t> ptr mem)
-       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x64 <t> ptr mem)
-       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked16 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK16load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK32load128 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load128)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK32load256 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load256)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked32 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK64load128 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load128)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK64load256 ptr mask mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load256)
-               v.AddArg3(ptr, mask, mem)
-               return true
-       }
-       // match: (LoadMasked64 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLoadMasked8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMasked8 <t> ptr mask mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
-       for {
-               t := v.Type
-               ptr := v_0
-               mask := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK8load512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (LocalAddr <t> {sym} base mem)
-       // cond: t.Elem().HasPointers()
-       // result: (LEAQ {sym} (SPanchored base mem))
-       for {
-               t := v.Type
-               sym := auxToSym(v.Aux)
-               base := v_0
-               mem := v_1
-               if !(t.Elem().HasPointers()) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
-               v0.AddArg2(base, mem)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (LocalAddr <t> {sym} base _)
-       // cond: !t.Elem().HasPointers()
-       // result: (LEAQ {sym} base)
-       for {
-               t := v.Type
-               sym := auxToSym(v.Aux)
-               base := v_0
-               if !(!t.Elem().HasPointers()) {
-                       break
-               }
-               v.reset(OpAMD64LEAQ)
-               v.Aux = symToAux(sym)
-               v.AddArg(base)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh16x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh16x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh32x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh32x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh64x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh64x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLQ x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLQ)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Lsh8x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Lsh8x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHLL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHLL)
-               v.AddArg2(x, y)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpMax32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Max32F <t> x y)
-       // result: (Neg32F <t> (Min32F <t> (Neg32F <t> x) (Neg32F <t> y)))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpNeg32F)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpMin32F, t)
-               v1 := b.NewValue0(v.Pos, OpNeg32F, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpNeg32F, t)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMax64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Max64F <t> x y)
-       // result: (Neg64F <t> (Min64F <t> (Neg64F <t> x) (Neg64F <t> y)))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpNeg64F)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpMin64F, t)
-               v1 := b.NewValue0(v.Pos, OpNeg64F, t)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpNeg64F, t)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x16 x y mask)
-       // result: (VMAXPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x4 x y mask)
-       // result: (VMAXPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat32x8 x y mask)
-       // result: (VMAXPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x2 x y mask)
-       // result: (VMAXPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x4 x y mask)
-       // result: (VMAXPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedFloat64x8 x y mask)
-       // result: (VMAXPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMAXPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x16 x y mask)
-       // result: (VPMAXSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x32 x y mask)
-       // result: (VPMAXSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt16x8 x y mask)
-       // result: (VPMAXSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x16 x y mask)
-       // result: (VPMAXSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x4 x y mask)
-       // result: (VPMAXSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt32x8 x y mask)
-       // result: (VPMAXSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x2 x y mask)
-       // result: (VPMAXSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x4 x y mask)
-       // result: (VPMAXSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt64x8 x y mask)
-       // result: (VPMAXSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x16 x y mask)
-       // result: (VPMAXSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x32 x y mask)
-       // result: (VPMAXSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedInt8x64 x y mask)
-       // result: (VPMAXSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x16 x y mask)
-       // result: (VPMAXUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x32 x y mask)
-       // result: (VPMAXUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint16x8 x y mask)
-       // result: (VPMAXUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x16 x y mask)
-       // result: (VPMAXUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x4 x y mask)
-       // result: (VPMAXUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint32x8 x y mask)
-       // result: (VPMAXUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x2 x y mask)
-       // result: (VPMAXUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x4 x y mask)
-       // result: (VPMAXUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint64x8 x y mask)
-       // result: (VPMAXUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x16 x y mask)
-       // result: (VPMAXUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x32 x y mask)
-       // result: (VPMAXUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMaxMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MaxMaskedUint8x64 x y mask)
-       // result: (VPMAXUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMAXUBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMin32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Min32F <t> x y)
-       // result: (POR (MINSS <t> (MINSS <t> x y) x) (MINSS <t> x y))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64POR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
-               v1.AddArg2(x, y)
-               v0.AddArg2(v1, x)
-               v.AddArg2(v0, v1)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMin64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Min64F <t> x y)
-       // result: (POR (MINSD <t> (MINSD <t> x y) x) (MINSD <t> x y))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64POR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
-               v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
-               v1.AddArg2(x, y)
-               v0.AddArg2(v1, x)
-               v.AddArg2(v0, v1)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x16 x y mask)
-       // result: (VMINPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x4 x y mask)
-       // result: (VMINPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat32x8 x y mask)
-       // result: (VMINPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x2 x y mask)
-       // result: (VMINPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x4 x y mask)
-       // result: (VMINPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedFloat64x8 x y mask)
-       // result: (VMINPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMINPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x16 x y mask)
-       // result: (VPMINSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x32 x y mask)
-       // result: (VPMINSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt16x8 x y mask)
-       // result: (VPMINSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x16 x y mask)
-       // result: (VPMINSDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x4 x y mask)
-       // result: (VPMINSDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt32x8 x y mask)
-       // result: (VPMINSDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x2 x y mask)
-       // result: (VPMINSQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x4 x y mask)
-       // result: (VPMINSQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt64x8 x y mask)
-       // result: (VPMINSQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x16 x y mask)
-       // result: (VPMINSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x32 x y mask)
-       // result: (VPMINSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedInt8x64 x y mask)
-       // result: (VPMINSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x16 x y mask)
-       // result: (VPMINUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x32 x y mask)
-       // result: (VPMINUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint16x8 x y mask)
-       // result: (VPMINUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x16 x y mask)
-       // result: (VPMINUDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x4 x y mask)
-       // result: (VPMINUDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint32x8 x y mask)
-       // result: (VPMINUDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x2 x y mask)
-       // result: (VPMINUQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x4 x y mask)
-       // result: (VPMINUQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint64x8 x y mask)
-       // result: (VPMINUQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x16 x y mask)
-       // result: (VPMINUBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x32 x y mask)
-       // result: (VPMINUBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMinMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MinMaskedUint8x64 x y mask)
-       // result: (VPMINUBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMINUBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod16 [a] x y)
-       // result: (Select1 (DIVW [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod16u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod16u x y)
-       // result: (Select1 (DIVWU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod32 [a] x y)
-       // result: (Select1 (DIVL [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod32u x y)
-       // result: (Select1 (DIVLU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod64 [a] x y)
-       // result: (Select1 (DIVQ [a] x y))
-       for {
-               a := auxIntToBool(v.AuxInt)
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
-               v0.AuxInt = boolToAuxInt(a)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod64u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod64u x y)
-       // result: (Select1 (DIVQU x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod8 x y)
-       // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
-               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMod8u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Mod8u x y)
-       // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpSelect1)
-               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
-               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
-               v2.AddArg(y)
-               v0.AddArg2(v1, v2)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMove(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Move [0] _ _ mem)
-       // result: mem
-       for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               mem := v_2
-               v.copyOf(mem)
-               return true
-       }
-       // match: (Move [1] dst src mem)
-       // result: (MOVBstore dst (MOVBload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 1 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [2] dst src mem)
-       // result: (MOVWstore dst (MOVWload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 2 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [4] dst src mem)
-       // result: (MOVLstore dst (MOVLload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 4 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [8] dst src mem)
-       // result: (MOVQstore dst (MOVQload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 8 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [16] dst src mem)
-       // result: (MOVOstore dst (MOVOload src mem) mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 16 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVOstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
-               v0.AddArg2(src, mem)
-               v.AddArg3(dst, v0, mem)
-               return true
-       }
-       // match: (Move [3] dst src mem)
-       // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 3 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(2)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(2)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [5] dst src mem)
-       // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 5 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(4)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(4)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [6] dst src mem)
-       // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 6 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(4)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = int32ToAuxInt(4)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [7] dst src mem)
-       // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 7 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(3)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(3)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [9] dst src mem)
-       // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 9 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVBstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [10] dst src mem)
-       // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 10 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVWstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [11] dst src mem)
-       // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 11 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(7)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(7)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [12] dst src mem)
-       // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               if auxIntToInt64(v.AuxInt) != 12 {
-                       break
-               }
-               dst := v_0
-               src := v_1
-               mem := v_2
-               v.reset(OpAMD64MOVLstore)
-               v.AuxInt = int32ToAuxInt(8)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
-               v0.AuxInt = int32ToAuxInt(8)
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s >= 13 && s <= 15
-       // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s >= 13 && s <= 15) {
-                       break
-               }
-               v.reset(OpAMD64MOVQstore)
-               v.AuxInt = int32ToAuxInt(int32(s - 8))
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v0.AuxInt = int32ToAuxInt(int32(s - 8))
-               v0.AddArg2(src, mem)
-               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v2.AddArg2(src, mem)
-               v1.AddArg3(dst, v2, mem)
-               v.AddArg3(dst, v0, v1)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > 16 && s < 192 && logLargeCopy(v, s)
-       // result: (LoweredMove [s] dst src mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > 16 && s < 192 && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64LoweredMove)
-               v.AuxInt = int64ToAuxInt(s)
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)
-       // result: (LoweredMoveLoop [s] dst src mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64LoweredMoveLoop)
-               v.AuxInt = int64ToAuxInt(s)
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > repMoveThreshold && s%8 != 0
-       // result: (Move [s-s%8] (OffPtr <dst.Type> dst [s%8]) (OffPtr <src.Type> src [s%8]) (MOVQstore dst (MOVQload src mem) mem))
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > repMoveThreshold && s%8 != 0) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(s - s%8)
-               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-               v0.AuxInt = int64ToAuxInt(s % 8)
-               v0.AddArg(dst)
-               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-               v1.AuxInt = int64ToAuxInt(s % 8)
-               v1.AddArg(src)
-               v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
-               v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
-               v3.AddArg2(src, mem)
-               v2.AddArg3(dst, v3, mem)
-               v.AddArg3(v0, v1, v2)
-               return true
-       }
-       // match: (Move [s] dst src mem)
-       // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)
-       // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem)
-       for {
-               s := auxIntToInt64(v.AuxInt)
-               dst := v_0
-               src := v_1
-               mem := v_2
-               if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) {
-                       break
-               }
-               v.reset(OpAMD64REPMOVSQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(s / 8)
-               v.AddArg4(dst, src, v0, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x16 x y z mask)
-       // result: (VFMADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x4 x y z mask)
-       // result: (VFMADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat32x8 x y z mask)
-       // result: (VFMADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x2 x y z mask)
-       // result: (VFMADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x4 x y z mask)
-       // result: (VFMADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddMaskedFloat64x8 x y z mask)
-       // result: (VFMADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADD213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x16 x y z mask)
-       // result: (VFMADDSUB213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x4 x y z mask)
-       // result: (VFMADDSUB213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat32x8 x y z mask)
-       // result: (VFMADDSUB213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x2 x y z mask)
-       // result: (VFMADDSUB213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x4 x y z mask)
-       // result: (VFMADDSUB213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulAddSubMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulAddSubMaskedFloat64x8 x y z mask)
-       // result: (VFMADDSUB213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMADDSUB213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x16 x y mask)
-       // result: (VPMULHWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x32 x y mask)
-       // result: (VPMULHWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedInt16x8 x y mask)
-       // result: (VPMULHWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x16 x y mask)
-       // result: (VPMULHUWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x32 x y mask)
-       // result: (VPMULHUWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulHighMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulHighMaskedUint16x8 x y mask)
-       // result: (VPMULHUWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULHUWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x16 x y mask)
-       // result: (VMULPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x4 x y mask)
-       // result: (VMULPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat32x8 x y mask)
-       // result: (VMULPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x2 x y mask)
-       // result: (VMULPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x4 x y mask)
-       // result: (VMULPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedFloat64x8 x y mask)
-       // result: (VMULPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VMULPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x16 x y mask)
-       // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x32 x y mask)
-       // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt16x8 x y mask)
-       // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x16 x y mask)
-       // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x4 x y mask)
-       // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt32x8 x y mask)
-       // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x2 x y mask)
-       // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x4 x y mask)
-       // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedInt64x8 x y mask)
-       // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x16 x y mask)
-       // result: (VPMULLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x32 x y mask)
-       // result: (VPMULLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint16x8 x y mask)
-       // result: (VPMULLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x16 x y mask)
-       // result: (VPMULLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x4 x y mask)
-       // result: (VPMULLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint32x8 x y mask)
-       // result: (VPMULLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x2 x y mask)
-       // result: (VPMULLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x4 x y mask)
-       // result: (VPMULLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulMaskedUint64x8 x y mask)
-       // result: (VPMULLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMULLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x16 x y z mask)
-       // result: (VFMSUBADD213PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x4 x y z mask)
-       // result: (VFMSUBADD213PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat32x8 x y z mask)
-       // result: (VFMSUBADD213PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x2 x y z mask)
-       // result: (VFMSUBADD213PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x4 x y z mask)
-       // result: (VFMSUBADD213PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpMulSubAddMaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (MulSubAddMaskedFloat64x8 x y z mask)
-       // result: (VFMSUBADD213PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VFMSUBADD213PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeg32F(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Neg32F x)
-       // result: (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
-       for {
-               x := v_0
-               v.reset(OpAMD64PXOR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32)
-               v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1)))
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeg64F(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (Neg64F x)
-       // result: (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
-       for {
-               x := v_0
-               v.reset(OpAMD64PXOR)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64)
-               v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1))
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq16 x y)
-       // result: (SETNE (CMPW x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq32 x y)
-       // result: (SETNE (CMPL x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq32F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq32F x y)
-       // result: (SETNEF (UCOMISS x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq64 x y)
-       // result: (SETNE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq64F(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq64F x y)
-       // result: (SETNEF (UCOMISD x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNEF)
-               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeq8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Neq8 x y)
-       // result: (SETNE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeqB(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (NeqB x y)
-       // result: (SETNE (CMPB x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNeqPtr(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (NeqPtr x y)
-       // result: (SETNE (CMPQ x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64SETNE)
-               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNot(v *Value) bool {
-       v_0 := v.Args[0]
-       // match: (Not x)
-       // result: (XORLconst [1] x)
-       for {
-               x := v_0
-               v.reset(OpAMD64XORLconst)
-               v.AuxInt = int32ToAuxInt(1)
-               v.AddArg(x)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualFloat32x16 x y)
-       // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat32x4 x y)
-       // result: (VCMPPS128 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS128)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat32x8 x y)
-       // result: (VCMPPS256 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPS256)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat64x2 x y)
-       // result: (VCMPPD128 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD128)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (NotEqualFloat64x4 x y)
-       // result: (VCMPPD256 [4] x y)
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VCMPPD256)
-               v.AuxInt = uint8ToAuxInt(4)
-               v.AddArg2(x, y)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualFloat64x8 x y)
-       // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualInt8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VCMPPSMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPSMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VCMPPDMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VCMPPDMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedFloat64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VCMPPDMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedInt8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x16 x y mask)
-       // result: (VPMOVMToVec16x16 (VPCMPUWMasked256 [4] x y (VPMOVVec16x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x32 x y mask)
-       // result: (VPMOVMToVec16x32 (VPCMPUWMasked512 [4] x y (VPMOVVec16x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint16x8 x y mask)
-       // result: (VPMOVMToVec16x8 (VPCMPUWMasked128 [4] x y (VPMOVVec16x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUWMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x16 x y mask)
-       // result: (VPMOVMToVec32x16 (VPCMPUDMasked512 [4] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x4 x y mask)
-       // result: (VPMOVMToVec32x4 (VPCMPUDMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint32x8 x y mask)
-       // result: (VPMOVMToVec32x8 (VPCMPUDMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUDMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x2 x y mask)
-       // result: (VPMOVMToVec64x2 (VPCMPUQMasked128 [4] x y (VPMOVVec64x2ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x4 x y mask)
-       // result: (VPMOVMToVec64x4 (VPCMPUQMasked256 [4] x y (VPMOVVec64x4ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint64x8 x y mask)
-       // result: (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x16 x y mask)
-       // result: (VPMOVMToVec8x16 (VPCMPUBMasked128 [4] x y (VPMOVVec8x16ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked128, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x32 x y mask)
-       // result: (VPMOVMToVec8x32 (VPCMPUBMasked256 [4] x y (VPMOVVec8x32ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked256, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualMaskedUint8x64 x y mask)
-       // result: (VPMOVMToVec8x64 (VPCMPUBMasked512 [4] x y (VPMOVVec8x64ToM <types.TypeMask> mask)))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUBMasked512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v1 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v1.AddArg(mask)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
+               v.reset(OpAMD64CMOVLCS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint16x32 x y)
-       // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLCC y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLCC)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint32x16 x y)
-       // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLLS y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLLS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint64x8 x y)
-       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLEQF y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLEQF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (NotEqualUint8x64 x y)
-       // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLNEF y x cond)
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
-               v0.AuxInt = uint8ToAuxInt(4)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOffPtr(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (OffPtr [off] ptr)
-       // cond: is32Bit(off)
-       // result: (ADDQconst [int32(off)] ptr)
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGTF y x cond)
         for {
-               off := auxIntToInt64(v.AuxInt)
-               ptr := v_0
-               if !(is32Bit(off)) {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
                         break
                 }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(int32(off))
-               v.AddArg(ptr)
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGTF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-       // match: (OffPtr [off] ptr)
-       // result: (ADDQ (MOVQconst [off]) ptr)
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is32BitInt(t)
+       // result: (CMOVLGEF y x cond)
         for {
-               off := auxIntToInt64(v.AuxInt)
-               ptr := v_0
-               v.reset(OpAMD64ADDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(off)
-               v.AddArg2(v0, ptr)
+               t := v.Type
+               x := v_0
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLGEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x16 x mask)
-       // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQ cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQ y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETEQ {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQ)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x32 x mask)
-       // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNE y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETNE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt16x8 x mask)
-       // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETL cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLT y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETL {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x16 x mask)
-       // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETG cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGT y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETG {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGT)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x4 x mask)
-       // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETLE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLE y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETLE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt32x8 x mask)
-       // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGE y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGE)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x2 x mask)
-       // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETA cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWHI y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETA {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWHI)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x4 x mask)
-       // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETB cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCS y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETB {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt64x8 x mask)
-       // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETAE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWCC y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETAE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWCC)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x16 x mask)
-       // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETBE cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWLS y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETBE {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWLS)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x32 x mask)
-       // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETEQF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWEQF y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETEQF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWEQF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedInt8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedInt8x64 x mask)
-       // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETNEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWNEF y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETNEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x16 x mask)
-       // result: (VPOPCNTWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGTF y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGTF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x32 x mask)
-       // result: (VPOPCNTWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y (SETGEF cond))
+       // cond: is16BitInt(t)
+       // result: (CMOVWGEF y x cond)
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               if v_2.Op != OpAMD64SETGEF {
+                       break
+               }
+               cond := v_2.Args[0]
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWGEF)
+               v.AddArg3(y, x, cond)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint16x8 x mask)
-       // result: (VPOPCNTWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 1
+       // result: (CondSelect <t> x y (MOVBQZX <typ.UInt64> check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 1) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x16 x mask)
-       // result: (VPOPCNTDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 2
+       // result: (CondSelect <t> x y (MOVWQZX <typ.UInt64> check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 2) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x4 x mask)
-       // result: (VPOPCNTDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 4
+       // result: (CondSelect <t> x y (MOVLQZX <typ.UInt64> check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 4) {
+                       break
+               }
+               v.reset(OpCondSelect)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLQZX, typ.UInt64)
+               v0.AddArg(check)
+               v.AddArg3(x, y, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint32x8 x mask)
-       // result: (VPOPCNTDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))
+       // result: (CMOVQNE y x (CMPQconst [0] check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && (is64BitInt(t) || isPtr(t))) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x2 x mask)
-       // result: (VPOPCNTQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)
+       // result: (CMOVLNE y x (CMPQconst [0] check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVLNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpOnesCountMaskedUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x4 x mask)
-       // result: (VPOPCNTQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CondSelect <t> x y check)
+       // cond: !check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)
+       // result: (CMOVWNE y x (CMPQconst [0] check))
         for {
+               t := v.Type
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               check := v_2
+               if !(!check.Type.IsFlags() && check.Type.Size() == 8 && is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64CMOVWNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v0.AddArg(check)
+               v.AddArg3(y, x, v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOnesCountMaskedUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint64x8 x mask)
-       // result: (VPOPCNTQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConst16(v *Value) bool {
+       // match: (Const16 [c])
+       // result: (MOVLconst [int32(c)])
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToInt16(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(int32(c))
                 return true
         }
  }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x16 x mask)
-       // result: (VPOPCNTBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConst8(v *Value) bool {
+       // match: (Const8 [c])
+       // result: (MOVLconst [int32(c)])
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToInt8(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(int32(c))
                 return true
         }
  }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x32 x mask)
-       // result: (VPOPCNTBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConstBool(v *Value) bool {
+       // match: (ConstBool [c])
+       // result: (MOVLconst [b2i32(c)])
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               c := auxIntToBool(v.AuxInt)
+               v.reset(OpAMD64MOVLconst)
+               v.AuxInt = int32ToAuxInt(b2i32(c))
                 return true
         }
  }
-func rewriteValueAMD64_OpOnesCountMaskedUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OnesCountMaskedUint8x64 x mask)
-       // result: (VPOPCNTBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
+func rewriteValueAMD64_OpConstNil(v *Value) bool {
+       // match: (ConstNil )
+       // result: (MOVQconst [0])
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPOPCNTBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64MOVQconst)
+               v.AuxInt = int64ToAuxInt(0)
                 return true
         }
  }
-func rewriteValueAMD64_OpOrMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (OrMaskedInt32x16 x y mask)
-       // result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz16 x)
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<16] x))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64BSFL)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 16)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpOrMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz16NonZero(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt32x4 x y mask)
-       // result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Ctz16NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpOrMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt32x8 x y mask)
-       // result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ctz16NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOrMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (OrMaskedInt64x2 x y mask)
-       // result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz32 x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpOrMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt64x4 x y mask)
-       // result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Ctz32 x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpAMD64BTSQconst, typ.UInt64)
+               v1.AuxInt = int8ToAuxInt(32)
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOrMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz32NonZero(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedInt64x8 x y mask)
-       // result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Ctz32NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpOrMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint32x16 x y mask)
-       // result: (VPORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Ctz32NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOrMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (OrMaskedUint32x4 x y mask)
-       // result: (VPORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz64 x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTQ x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTQ)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpOrMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint32x8 x y mask)
-       // result: (VPORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Ctz64 <t> x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64CMOVQEQ)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1.AddArg(x)
+               v0.AddArg(v1)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQconst, t)
+               v2.AuxInt = int64ToAuxInt(64)
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v3.AddArg(v1)
+               v.AddArg3(v0, v2, v3)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOrMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz64NonZero(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (OrMaskedUint64x2 x y mask)
-       // result: (VPORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz64NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTQ x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTQ)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpOrMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (OrMaskedUint64x4 x y mask)
-       // result: (VPORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Ctz64NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (Select0 (BSFQ x))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64BSFQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpOrMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (OrMaskedUint64x8 x y mask)
-       // result: (VPORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Ctz8 x)
+       // result: (BSFL (ORLconst <typ.UInt32> [1<<8 ] x))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64BSFL)
+               v0 := b.NewValue0(v.Pos, OpAMD64ORLconst, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(1 << 8)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCtz8NonZero(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Permute2MaskedFloat32x16 x y z mask)
-       // result: (VPERMI2PSMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Ctz8NonZero x)
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (TZCNTL x)
         for {
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(buildcfg.GOAMD64 >= 3) {
+                       break
+               }
+               v.reset(OpAMD64TZCNTL)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpPermute2MaskedFloat32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Permute2MaskedFloat32x4 x y z mask)
-       // result: (VPERMI2PSMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Ctz8NonZero x)
+       // cond: buildcfg.GOAMD64 < 3
+       // result: (BSFL x)
         for {
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(buildcfg.GOAMD64 < 3) {
+                       break
+               }
+               v.reset(OpAMD64BSFL)
+               v.AddArg(x)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpPermute2MaskedFloat32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask16x16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedFloat32x8 x y z mask)
-       // result: (VPERMI2PSMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask16x16 <t> x)
+       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVWk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x16)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask32x16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedFloat64x2 x y z mask)
-       // result: (VPERMI2PDMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask32x16 <t> x)
+       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVWk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt16toMask8x16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedFloat64x4 x y z mask)
-       // result: (VPERMI2PDMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Cvt16toMask8x16 <t> x)
+       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x16)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVWk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedFloat64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt32toMask16x32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedFloat64x8 x y z mask)
-       // result: (VPERMI2PDMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Cvt32toMask16x32 <t> x)
+       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVDk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt32toMask8x32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt16x16 x y z mask)
-       // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Cvt32toMask8x32 <t> x)
+       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVDk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x32)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVDk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt64toMask8x64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt16x32 x y z mask)
-       // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Cvt64toMask8x64 <t> x)
+       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask16x8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt16x8 x y z mask)
-       // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask16x8 <t> x)
+       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec16x8)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask32x4(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt32x16 x y z mask)
-       // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask32x4 <t> x)
+       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x4)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask32x8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt32x4 x y z mask)
-       // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask32x8 <t> x)
+       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec32x8)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x2(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt32x8 x y z mask)
-       // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x2 <t> x)
+       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x2)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x4(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt64x2 x y z mask)
-       // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x4 <t> x)
+       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x4)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvt8toMask64x8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt64x4 x y z mask)
-       // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Cvt8toMask64x8 <t> x)
+       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVBk <t> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVBk, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x16to16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt64x8 x y z mask)
-       // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (CvtMask16x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec16x16ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt8x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x32to32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt8x16 x y z mask)
-       // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (CvtMask16x32to32 <t> x)
+       // result: (KMOVDi <t> (VPMOVVec16x32ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVDi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt8x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask16x8to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt8x32 x y z mask)
-       // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (CvtMask16x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec16x8ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedInt8x64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x16to16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedInt8x64 x y z mask)
-       // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (CvtMask32x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec32x16ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x4to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint16x16 x y z mask)
-       // result: (VPERMI2WMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (CvtMask32x4to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec32x4ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask32x8to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint16x32 x y z mask)
-       // result: (VPERMI2WMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (CvtMask32x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec32x8ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x2to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint16x8 x y z mask)
-       // result: (VPERMI2WMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (CvtMask64x2to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x2ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2WMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x4to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint32x16 x y z mask)
-       // result: (VPERMI2DMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (CvtMask64x4to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x4ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask64x8to8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint32x4 x y z mask)
-       // result: (VPERMI2DMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (CvtMask64x8to8 <t> x)
+       // result: (KMOVBi <t> (VPMOVVec64x8ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVBi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x16to16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint32x8 x y z mask)
-       // result: (VPERMI2DMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (CvtMask8x16to16 <t> x)
+       // result: (KMOVWi <t> (VPMOVVec8x16ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2DMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVWi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x32to32(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint64x2 x y z mask)
-       // result: (VPERMI2QMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (CvtMask8x32to32 <t> x)
+       // result: (KMOVDi <t> (VPMOVVec8x32ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVDi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpCvtMask8x64to64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint64x4 x y z mask)
-       // result: (VPERMI2QMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (CvtMask8x64to64 <t> x)
+       // result: (KMOVQi <t> (VPMOVVec8x64ToM <types.TypeMask> x))
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64KMOVQi)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint64x8 x y z mask)
-       // result: (VPERMI2QMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div16 [a] x y)
+       // result: (Select0 (DIVW [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2QMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint8x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv16u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint8x16 x y z mask)
-       // result: (VPERMI2BMasked128 x y z (VPMOVVec8x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div16u x y)
+       // result: (Select0 (DIVWU x y))
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint8x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint8x32 x y z mask)
-       // result: (VPERMI2BMasked256 x y z (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div32 [a] x y)
+       // result: (Select0 (DIVL [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermute2MaskedUint8x64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv32u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Permute2MaskedUint8x64 x y z mask)
-       // result: (VPERMI2BMasked512 x y z (VPMOVVec8x64ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div32u x y)
+       // result: (Select0 (DIVLU x y))
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPERMI2BMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedFloat32x16 x y mask)
-       // result: (VPERMPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div64 [a] x y)
+       // result: (Select0 (DIVQ [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv64u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedFloat32x8 x y mask)
-       // result: (VPERMPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div64u x y)
+       // result: (Select0 (DIVQU x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedFloat64x4 x y mask)
-       // result: (VPERMPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div8 x y)
+       // result: (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpDiv8u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedFloat64x8 x y mask)
-       // result: (VPERMPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Div8u x y)
+       // result: (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt16x16 x y mask)
-       // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Eq16 x y)
+       // result: (SETEQ (CMPW x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt16x32 x y mask)
-       // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Eq32 x y)
+       // result: (SETEQ (CMPL x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt16x8 x y mask)
-       // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Eq32F x y)
+       // result: (SETEQF (UCOMISS x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt32x16 x y mask)
-       // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Eq64 x y)
+       // result: (SETEQ (CMPQ x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq64F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt32x8 x y mask)
-       // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Eq64F x y)
+       // result: (SETEQF (UCOMISD x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEq8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt64x4 x y mask)
-       // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Eq8 x y)
+       // result: (SETEQ (CMPB x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqB(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt64x8 x y mask)
-       // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (EqB x y)
+       // result: (SETEQ (CMPB x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqPtr(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt8x16 x y mask)
-       // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (EqPtr x y)
+       // result: (SETEQ (CMPQ x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETEQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedInt8x32 x y mask)
-       // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(0)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedInt8x64 x y mask)
-       // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (EqualFloat32x4 x y)
+       // result: (VCMPPS128 [0] x y)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               y := v_1
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x16 x y mask)
-       // result: (VPERMWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (EqualFloat32x8 x y)
+       // result: (VCMPPS256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x32 x y mask)
-       // result: (VPERMWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (EqualFloat64x2 x y)
+       // result: (VCMPPD128 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (PermuteMaskedUint16x8 x y mask)
-       // result: (VPERMWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (EqualFloat64x4 x y)
+       // result: (VCMPPD256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint32x16 x y mask)
-       // result: (VPERMDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(0)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint32x8 x y mask)
-       // result: (VPERMDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint64x4 x y mask)
-       // result: (VPERMQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint64x8 x y mask)
-       // result: (VPERMQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint8x16 x y mask)
-       // result: (VPERMBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint8x32 x y mask)
-       // result: (VPERMBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPermuteMaskedUint8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (PermuteMaskedUint8x64 x y mask)
-       // result: (VPERMBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (EqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPERMBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPopCount16(v *Value) bool {
+func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (PopCount16 x)
-       // result: (POPCNTL (MOVWQZX <typ.UInt32> x))
+       // match: (EqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
         for {
                 x := v_0
-               v.reset(OpAMD64POPCNTL)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
-               v0.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpPopCount8(v *Value) bool {
+func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
+       v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (PopCount8 x)
-       // result: (POPCNTL (MOVBQZX <typ.UInt32> x))
+       // match: (EqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
         for {
                 x := v_0
-               v.reset(OpAMD64POPCNTL)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
-               v0.AddArg(x)
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat32x16 x mask)
-       // result: (VRCP14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x16 x mask)
+       // result: (VEXPANDPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked512)
+               v.reset(OpAMD64VEXPANDPSMasked512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat32x4 x mask)
-       // result: (VRCP14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x4 x mask)
+       // result: (VEXPANDPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked128)
+               v.reset(OpAMD64VEXPANDPSMasked128)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat32x8 x mask)
-       // result: (VRCP14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandFloat32x8 x mask)
+       // result: (VEXPANDPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PSMasked256)
+               v.reset(OpAMD64VEXPANDPSMasked256)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat64x2 x mask)
-       // result: (VRCP14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x2 x mask)
+       // result: (VEXPANDPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked128)
+               v.reset(OpAMD64VEXPANDPDMasked128)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat64x4 x mask)
-       // result: (VRCP14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x4 x mask)
+       // result: (VEXPANDPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked256)
+               v.reset(OpAMD64VEXPANDPDMasked256)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalMaskedFloat64x8 x mask)
-       // result: (VRCP14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandFloat64x8 x mask)
+       // result: (VEXPANDPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRCP14PDMasked512)
+               v.reset(OpAMD64VEXPANDPDMasked512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x16 x mask)
-       // result: (VRSQRT14PSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x16 x mask)
+       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x4 x mask)
-       // result: (VRSQRT14PSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x32 x mask)
+       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat32x8 x mask)
-       // result: (VRSQRT14PSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt16x8 x mask)
+       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x2 x mask)
-       // result: (VRSQRT14PDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x16 x mask)
+       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x4 x mask)
-       // result: (VRSQRT14PDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x4 x mask)
+       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpReciprocalSqrtMaskedFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ReciprocalSqrtMaskedFloat64x8 x mask)
-       // result: (VRSQRT14PDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt32x8 x mask)
+       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VRSQRT14PDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt32x16 [a] x mask)
-       // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x2 x mask)
+       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt32x4 [a] x mask)
-       // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x4 x mask)
+       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt32x8 [a] x mask)
-       // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt64x8 x mask)
+       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt64x2 [a] x mask)
-       // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x16 x mask)
+       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt64x4 [a] x mask)
-       // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x32 x mask)
+       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedInt64x8 [a] x mask)
-       // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandInt8x64 x mask)
+       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint32x16 [a] x mask)
-       // result: (VPROLDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x16 x mask)
+       // result: (VPEXPANDWMasked256 x (VPMOVVec16x16ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint32x4 [a] x mask)
-       // result: (VPROLDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x32 x mask)
+       // result: (VPEXPANDWMasked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint32x8 [a] x mask)
-       // result: (VPROLDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint16x8 x mask)
+       // result: (VPEXPANDWMasked128 x (VPMOVVec16x8ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDWMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint64x2 [a] x mask)
-       // result: (VPROLQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x16 x mask)
+       // result: (VPEXPANDDMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint64x4 [a] x mask)
-       // result: (VPROLQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x4 x mask)
+       // result: (VPEXPANDDMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllLeftMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllLeftMaskedUint64x8 [a] x mask)
-       // result: (VPROLQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint32x8 x mask)
+       // result: (VPEXPANDDMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPROLQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDDMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt32x16 [a] x mask)
-       // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x2 x mask)
+       // result: (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt32x4 [a] x mask)
-       // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x4 x mask)
+       // result: (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt32x8 [a] x mask)
-       // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint64x8 x mask)
+       // result: (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDQMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x2(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt64x2 [a] x mask)
-       // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x16 x mask)
+       // result: (VPEXPANDBMasked128 x (VPMOVVec8x16ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked128)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt64x4 [a] x mask)
-       // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x32 x mask)
+       // result: (VPEXPANDBMasked256 x (VPMOVVec8x32ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked256)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpExpandUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (RotateAllRightMaskedInt64x8 [a] x mask)
-       // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (ExpandUint8x64 x mask)
+       // result: (VPEXPANDBMasked512 x (VPMOVVec8x64ToM <types.TypeMask> mask))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 mask := v_1
-               v.reset(OpAMD64VPRORQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v.reset(OpAMD64VPEXPANDBMasked512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                 v0.AddArg(mask)
                 v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpFMA(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x16 [a] x mask)
-       // result: (VPRORDMasked512 [a] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FMA x y z)
+       // result: (VFMADD231SD z x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               y := v_1
+               z := v_2
+               v.reset(OpAMD64VFMADD231SD)
+               v.AddArg3(z, x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloor(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x4 [a] x mask)
-       // result: (VPRORDMasked128 [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Floor x)
+       // result: (ROUNDSD [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint32x8 [a] x mask)
-       // result: (VPRORDMasked256 [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorFloat32x4 x)
+       // result: (VROUNDPS128 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x2 [a] x mask)
-       // result: (VPRORQMasked128 [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorFloat32x8 x)
+       // result: (VROUNDPS256 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x4 [a] x mask)
-       // result: (VPRORQMasked256 [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorFloat64x2 x)
+       // result: (VROUNDPD128 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateAllRightMaskedUint64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateAllRightMaskedUint64x8 [a] x mask)
-       // result: (VPRORQMasked512 [a] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorFloat64x4 x)
+       // result: (VROUNDPD256 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VPRORQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x16 x y mask)
-       // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x4 x y mask)
-       // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt32x8 x y mask)
-       // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x2 x y mask)
-       // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x4 x y mask)
-       // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedInt64x8 x y mask)
-       // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x16 x y mask)
-       // result: (VPROLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x4 x y mask)
-       // result: (VPROLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint32x8 x y mask)
-       // result: (VPROLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x2 x y mask)
-       // result: (VPROLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x4 x y mask)
-       // result: (VPROLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpFloorScaledResidueFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateLeftMaskedUint64x8 x y mask)
-       // result: (VPROLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (FloorScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+1] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPROLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetG(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x16 x y mask)
-       // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetG mem)
+       // cond: v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal
+       // result: (LoweredGetG mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               mem := v_0
+               if !(v.Block.Func.OwnAux.Fn.ABI() != obj.ABIInternal) {
+                       break
+               }
+               v.reset(OpAMD64LoweredGetG)
+               v.AddArg(mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x4 x y mask)
-       // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetHiFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt32x8 x y mask)
-       // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetHiFloat32x8 x)
+       // result: (VEXTRACTF128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x2 x y mask)
-       // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetHiFloat64x4 x)
+       // result: (VEXTRACTF128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x4 x y mask)
-       // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetHiFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedInt64x8 x y mask)
-       // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x16 x y mask)
-       // result: (VPRORVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetHiInt16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x4 x y mask)
-       // result: (VPRORVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetHiInt32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint32x8 x y mask)
-       // result: (VPRORVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x2 x y mask)
-       // result: (VPRORVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetHiInt64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x4 x y mask)
-       // result: (VPRORVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetHiInt64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRotateRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RotateRightMaskedUint64x8 x y mask)
-       // result: (VPRORVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetHiInt8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPRORVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
+func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEven x)
-       // result: (ROUNDSD [0] x)
+       // match: (GetHiInt8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               v.reset(OpAMD64ROUNDSD)
-               v.AuxInt = int8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenFloat32x4 x)
-       // result: (VROUNDPS128 [0] x)
+       // match: (GetHiUint16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               v.reset(OpAMD64VROUNDPS128)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenFloat32x8 x)
-       // result: (VROUNDPS256 [0] x)
+       // match: (GetHiUint16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               v.reset(OpAMD64VROUNDPS256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenFloat64x2 x)
-       // result: (VROUNDPD128 [0] x)
+       // match: (GetHiUint32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
                 x := v_0
-               v.reset(OpAMD64VROUNDPD128)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenFloat64x4 x)
-       // result: (VROUNDPD256 [0] x)
+       // match: (GetHiUint32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
                 x := v_0
-               v.reset(OpAMD64VROUNDPD256)
-               v.AuxInt = uint8ToAuxInt(0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x16 [a] x)
-       // result: (VRNDSCALEPS512 [a+0] x)
+       // match: (GetHiUint64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x4 [a] x)
-       // result: (VRNDSCALEPS128 [a+0] x)
+       // match: (GetHiUint64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat32x8 [a] x)
-       // result: (VRNDSCALEPS256 [a+0] x)
+       // match: (GetHiUint8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x2 [a] x)
-       // result: (VRNDSCALEPD128 [a+0] x)
+       // match: (GetHiUint8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x4 [a] x)
-       // result: (VRNDSCALEPD256 [a+0] x)
+       // match: (GetLoFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledFloat64x8 [a] x)
-       // result: (VRNDSCALEPD512 [a+0] x)
+       // match: (GetLoFloat32x8 x)
+       // result: (VEXTRACTF128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VRNDSCALEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetLoFloat64x4 x)
+       // result: (VEXTRACTF128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetLoFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetLoInt16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetLoInt16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetLoInt32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetLoInt32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x16 [a] x)
-       // result: (VREDUCEPS512 [a+0] x)
+       // match: (GetLoInt64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPS512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x4 [a] x)
-       // result: (VREDUCEPS128 [a+0] x)
+       // match: (GetLoInt64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPS128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat32x8 [a] x)
-       // result: (VREDUCEPS256 [a+0] x)
+       // match: (GetLoInt8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPS256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool {
+func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x2 [a] x)
-       // result: (VREDUCEPD128 [a+0] x)
+       // match: (GetLoInt8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPD128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x4 [a] x)
-       // result: (VREDUCEPD256 [a+0] x)
+       // match: (GetLoUint16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPD256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool {
         v_0 := v.Args[0]
-       // match: (RoundToEvenScaledResidueFloat64x8 [a] x)
-       // result: (VREDUCEPD512 [a+0] x)
+       // match: (GetLoUint16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               v.reset(OpAMD64VREDUCEPD512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
                 v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+0] x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (GetLoUint32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+0] x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (GetLoUint32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+0] x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (GetLoUint64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+0] x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (GetLoUint64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+0] x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (GetLoUint8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRoundToEvenScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (RoundToEvenScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+0] x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (GetLoUint8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 0)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [13] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh16Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+}
+func rewriteValueAMD64_OpGreaterEqualFloat32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterEqualFloat32x4 x y)
+       // result: (VCMPPS128 [13] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(13)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh16Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16])))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh16Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+       // match: (GreaterEqualFloat32x8 x y)
+       // result: (VCMPPS256 [13] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(13)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh16Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16])))
+       // match: (GreaterEqualFloat64x2 x y)
+       // result: (VCMPPD128 [13] x y)
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(13)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (Rsh16Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
+}
+func rewriteValueAMD64_OpGreaterEqualFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterEqualFloat64x4 x y)
+       // result: (VCMPPD256 [13] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(13)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(16)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh16Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRW x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRW)
-               v.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16x16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16x32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh16x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
-       for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(16)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
-               return true
-       }
-       // match: (Rsh16x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARW x y)
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARW)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
-               return true
-       }
-       // match: (Rsh32Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh32Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
-       for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+}
+func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(13)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh32Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [14] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh32Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
+       // match: (GreaterFloat32x4 x y)
+       // result: (VCMPPS128 [14] x y)
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(32)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(14)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (Rsh32Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterFloat32x8 x y)
+       // result: (VCMPPS256 [14] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRL)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(14)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32x16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh32x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32])))))
+       // match: (GreaterFloat64x2 x y)
+       // result: (VCMPPD128 [14] x y)
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(14)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (Rsh32x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (GreaterFloat64x4 x y)
+       // result: (VCMPPD256 [14] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(14)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32x32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh32x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
+func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh32x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
+func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh32x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
+       typ := &b.Func.Config.Types
+       // match: (GreaterInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(32)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh32x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARL x y)
+}
+func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARL)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool {
+func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool {
+func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
+       typ := &b.Func.Config.Types
+       // match: (GreaterUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(14)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (HasCPUFeature {s})
+       // result: (SETNE (CMPLconst [0] (LoweredHasCPUFeature {s})))
         for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               s := auxToSym(v.Aux)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v0.AuxInt = int32ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64)
+               v1.Aux = symToAux(s)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
+func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
+       // match: (IsInBounds idx len)
+       // result: (SETB (CMPQ idx len))
         for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               idx := v_0
+               len := v_1
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(idx, len)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (IsNanFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(3)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh64Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
+       // match: (IsNanFloat32x4 x y)
+       // result: (VCMPPS128 [3] x y)
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQ)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(64)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(3)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (Rsh64Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (IsNanFloat32x8 x y)
+       // result: (VCMPPS256 [3] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRQ)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(3)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64x16(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (Rsh64x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64])))))
+       // match: (IsNanFloat64x2 x y)
+       // result: (VCMPPD128 [3] x y)
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(3)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (Rsh64x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (IsNanFloat64x4 x y)
+       // result: (VCMPPD256 [3] x y)
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(3)
                 v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64x32(v *Value) bool {
+func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64])))))
+       typ := &b.Func.Config.Types
+       // match: (IsNanFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(3)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpIsNonNil(v *Value) bool {
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (IsNonNil p)
+       // result: (SETNE (TESTQ p p))
         for {
-               x := v_0
-               y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               p := v_0
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64TESTQ, types.TypeFlags)
+               v0.AddArg2(p, p)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
+func rewriteValueAMD64_OpIsSliceInBounds(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64])))))
+       // match: (IsSliceInBounds idx len)
+       // result: (SETBE (CMPQ idx len))
         for {
-               t := v.Type
-               x := v_0
-               y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               idx := v_0
+               len := v_1
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(idx, len)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpLeq16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq16 x y)
+       // result: (SETLE (CMPW x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
+func rewriteValueAMD64_OpLeq16U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh64x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
+       // match: (Leq16U x y)
+       // result: (SETBE (CMPW x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(64)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh64x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARQ x y)
+}
+func rewriteValueAMD64_OpLeq32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq32 x y)
+       // result: (SETLE (CMPL x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARQ)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool {
+func rewriteValueAMD64_OpLeq32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8Ux16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8])))
+       // match: (Leq32F x y)
+       // result: (SETGEF (UCOMISS y x))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
-               v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v2.AuxInt = int16ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.reset(OpAMD64SETGEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8Ux16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq32U(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq32U x y)
+       // result: (SETBE (CMPL x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool {
+func rewriteValueAMD64_OpLeq64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8Ux32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8])))
+       // match: (Leq64 x y)
+       // result: (SETLE (CMPQ x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8Ux32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq64F x y)
+       // result: (SETGEF (UCOMISD y x))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETGEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
+func rewriteValueAMD64_OpLeq64U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8Ux64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8])))
+       // match: (Leq64U x y)
+       // result: (SETBE (CMPQ x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v2.AuxInt = int32ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8Ux64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLeq8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Leq8 x y)
+       // result: (SETLE (CMPB x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETLE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
+func rewriteValueAMD64_OpLeq8U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8Ux8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
+       // match: (Leq8U x y)
+       // result: (SETBE (CMPB x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64ANDL)
-               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v.reset(OpAMD64SETBE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
                 v0.AddArg2(x, y)
-               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
-               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v2.AuxInt = int8ToAuxInt(8)
-               v2.AddArg(y)
-               v1.AddArg(v2)
-               v.AddArg2(v0, v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8Ux8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SHRB x y)
+}
+func rewriteValueAMD64_OpLess16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less16 x y)
+       // result: (SETL (CMPW x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SHRB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8x16(v *Value) bool {
+func rewriteValueAMD64_OpLess16U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8x16 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
+       // match: (Less16U x y)
+       // result: (SETB (CMPW x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
-               v3.AuxInt = int16ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8x16 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less32 x y)
+       // result: (SETL (CMPL x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8x32(v *Value) bool {
+func rewriteValueAMD64_OpLess32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8x32 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8])))))
+       // match: (Less32F x y)
+       // result: (SETGF (UCOMISS y x))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETGF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8x32 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess32U(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less32U x y)
+       // result: (SETB (CMPL x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
+func rewriteValueAMD64_OpLess64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8x64 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8])))))
+       // match: (Less64 x y)
+       // result: (SETL (CMPQ x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
-               v3.AuxInt = int32ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8x64 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess64F(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less64F x y)
+       // result: (SETGF (UCOMISD y x))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETGF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(y, x)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
+func rewriteValueAMD64_OpLess64U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (Rsh8x8 <t> x y)
-       // cond: !shiftIsBounded(v)
-       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
+       // match: (Less64U x y)
+       // result: (SETB (CMPQ x y))
         for {
-               t := v.Type
                 x := v_0
                 y := v_1
-               if !(!shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.Type = t
-               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
-               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
-               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
-               v3.AuxInt = int8ToAuxInt(8)
-               v3.AddArg(y)
-               v2.AddArg(v3)
-               v1.AddArg(v2)
-               v0.AddArg2(y, v1)
-               v.AddArg2(x, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Rsh8x8 x y)
-       // cond: shiftIsBounded(v)
-       // result: (SARB x y)
+}
+func rewriteValueAMD64_OpLess8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (Less8 x y)
+       // result: (SETL (CMPB x y))
         for {
                 x := v_0
                 y := v_1
-               if !(shiftIsBounded(v)) {
-                       break
-               }
-               v.reset(OpAMD64SARB)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64SETL)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpScaleMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLess8U(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ScaleMaskedFloat32x16 x y mask)
-       // result: (VSCALEFPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Less8U x y)
+       // result: (SETB (CMPB x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETB)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpScaleMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ScaleMaskedFloat32x4 x y mask)
-       // result: (VSCALEFPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (LessEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [2] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpScaleMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat32x8 x y mask)
-       // result: (VSCALEFPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat32x4 x y)
+       // result: (VCMPPS128 [2] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpScaleMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x2 x y mask)
-       // result: (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat32x8 x y)
+       // result: (VCMPPS256 [2] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpScaleMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x4 x y mask)
-       // result: (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat64x2 x y)
+       // result: (VCMPPD128 [2] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpScaleMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLessEqualFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ScaleMaskedFloat64x8 x y mask)
-       // result: (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (LessEqualFloat64x4 x y)
+       // result: (VCMPPD256 [2] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSCALEFPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(2)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSelect0(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (Select0 (Mul64uover x y))
-       // result: (Select0 <typ.UInt64> (MULQU x y))
+       // match: (LessEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
         for {
-               if v_0.Op != OpMul64uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
                 v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select0 (Mul32uover x y))
-       // result: (Select0 <typ.UInt32> (MULLU x y))
+}
+func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
         for {
-               if v_0.Op != OpMul32uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt32
-               v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
                 v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select0 (Add64carry x y c))
-       // result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-       for {
-               if v_0.Op != OpAdd64carry {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v2.AddArg(c)
-               v1.AddArg(v2)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (Select0 (Sub64borrow x y c))
-       // result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-       for {
-               if v_0.Op != OpSub64borrow {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpSelect0)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v2.AddArg(c)
-               v1.AddArg(v2)
-               v0.AddArg3(x, y, v1)
-               v.AddArg(v0)
-               return true
-       }
-       // match: (Select0 <t> (AddTupleFirst32 val tuple))
-       // result: (ADDL val (Select0 <t> tuple))
-       for {
-               t := v.Type
-               if v_0.Op != OpAMD64AddTupleFirst32 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               val := v_0.Args[0]
-               v.reset(OpAMD64ADDL)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v0.AddArg(tuple)
-               v.AddArg2(val, v0)
-               return true
-       }
-       // match: (Select0 <t> (AddTupleFirst64 val tuple))
-       // result: (ADDQ val (Select0 <t> tuple))
-       for {
-               t := v.Type
-               if v_0.Op != OpAMD64AddTupleFirst64 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               val := v_0.Args[0]
-               v.reset(OpAMD64ADDQ)
-               v0 := b.NewValue0(v.Pos, OpSelect0, t)
-               v0.AddArg(tuple)
-               v.AddArg2(val, v0)
-               return true
-       }
-       // match: (Select0 a:(ADDQconstflags [c] x))
-       // cond: a.Uses == 1
-       // result: (ADDQconst [c] x)
-       for {
-               a := v_0
-               if a.Op != OpAMD64ADDQconstflags {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               x := a.Args[0]
-               if !(a.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64ADDQconst)
-               v.AuxInt = int32ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       // match: (Select0 a:(ADDLconstflags [c] x))
-       // cond: a.Uses == 1
-       // result: (ADDLconst [c] x)
-       for {
-               a := v_0
-               if a.Op != OpAMD64ADDLconstflags {
-                       break
-               }
-               c := auxIntToInt32(a.AuxInt)
-               x := a.Args[0]
-               if !(a.Uses == 1) {
-                       break
-               }
-               v.reset(OpAMD64ADDLconst)
-               v.AuxInt = int32ToAuxInt(c)
-               v.AddArg(x)
-               return true
-       }
-       return false
  }
-func rewriteValueAMD64_OpSelect1(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (Select1 (Mul64uover x y))
-       // result: (SETO (Select1 <types.TypeFlags> (MULQU x y)))
+       // match: (LessEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
         for {
-               if v_0.Op != OpMul64uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETO)
-               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v1.AddArg2(x, y)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select1 (Mul32uover x y))
-       // result: (SETO (Select1 <types.TypeFlags> (MULLU x y)))
+}
+func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
         for {
-               if v_0.Op != OpMul32uover {
-                       break
-               }
-               y := v_0.Args[1]
-               x := v_0.Args[0]
-               v.reset(OpAMD64SETO)
-               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v1.AddArg2(x, y)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select1 (Add64carry x y c))
-       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
+}
+func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
         for {
-               if v_0.Op != OpAdd64carry {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64NEGQ)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v4.AddArg(c)
-               v3.AddArg(v4)
-               v2.AddArg3(x, y, v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select1 (Sub64borrow x y c))
-       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
+}
+func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
         for {
-               if v_0.Op != OpSub64borrow {
-                       break
-               }
-               c := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               v.reset(OpAMD64NEGQ)
-               v.Type = typ.UInt64
-               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v4.AddArg(c)
-               v3.AddArg(v4)
-               v2.AddArg3(x, y, v3)
-               v1.AddArg(v2)
-               v0.AddArg(v1)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
                 v.AddArg(v0)
                 return true
         }
-       // match: (Select1 (NEGLflags (MOVQconst [0])))
-       // result: (FlagEQ)
-       for {
-               if v_0.Op != OpAMD64NEGLflags {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 {
-                       break
-               }
-               v.reset(OpAMD64FlagEQ)
-               return true
-       }
-       // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
-       // result: x
-       for {
-               if v_0.Op != OpAMD64NEGLflags {
-                       break
-               }
-               v_0_0 := v_0.Args[0]
-               if v_0_0.Op != OpAMD64NEGQ {
-                       break
-               }
-               v_0_0_0 := v_0_0.Args[0]
-               if v_0_0_0.Op != OpAMD64SBBQcarrymask {
-                       break
-               }
-               x := v_0_0_0.Args[0]
-               v.copyOf(x)
-               return true
-       }
-       // match: (Select1 (AddTupleFirst32 _ tuple))
-       // result: (Select1 tuple)
-       for {
-               if v_0.Op != OpAMD64AddTupleFirst32 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               v.reset(OpSelect1)
-               v.AddArg(tuple)
-               return true
-       }
-       // match: (Select1 (AddTupleFirst64 _ tuple))
-       // result: (Select1 tuple)
-       for {
-               if v_0.Op != OpAMD64AddTupleFirst64 {
-                       break
-               }
-               tuple := v_0.Args[1]
-               v.reset(OpSelect1)
-               v.AddArg(tuple)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ANDQlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicAnd64 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ANDQlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ANDLlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicAnd32 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ANDLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicOr64 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ORQlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicOr64 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ORQlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       // match: (Select1 a:(LoweredAtomicOr32 ptr val mem))
-       // cond: a.Uses == 1 && clobber(a)
-       // result: (ORLlock ptr val mem)
-       for {
-               a := v_0
-               if a.Op != OpAMD64LoweredAtomicOr32 {
-                       break
-               }
-               mem := a.Args[2]
-               ptr := a.Args[0]
-               val := a.Args[1]
-               if !(a.Uses == 1 && clobber(a)) {
-                       break
-               }
-               v.reset(OpAMD64ORLlock)
-               v.AddArg3(ptr, val, mem)
-               return true
-       }
-       return false
  }
-func rewriteValueAMD64_OpSelectN(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       config := b.Func.Config
-       // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem)))))
-       // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)
-       // result: (Move [sc.Val64()] dst src mem)
-       for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               call := v_0
-               if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 {
-                       break
-               }
-               sym := auxToCall(call.Aux)
-               s1 := call.Args[0]
-               if s1.Op != OpAMD64MOVQstoreconst {
-                       break
-               }
-               sc := auxIntToValAndOff(s1.AuxInt)
-               _ = s1.Args[1]
-               s2 := s1.Args[1]
-               if s2.Op != OpAMD64MOVQstore {
-                       break
-               }
-               _ = s2.Args[2]
-               src := s2.Args[1]
-               s3 := s2.Args[2]
-               if s3.Op != OpAMD64MOVQstore {
-                       break
-               }
-               mem := s3.Args[2]
-               dst := s3.Args[1]
-               if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(sc.Val64())
-               v.AddArg3(dst, src, mem)
-               return true
-       }
-       // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem))
-       // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)
-       // result: (Move [sz] dst src mem)
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
         for {
-               if auxIntToInt64(v.AuxInt) != 0 {
-                       break
-               }
-               call := v_0
-               if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 {
-                       break
-               }
-               sym := auxToCall(call.Aux)
-               mem := call.Args[3]
-               dst := call.Args[0]
-               src := call.Args[1]
-               call_2 := call.Args[2]
-               if call_2.Op != OpAMD64MOVQconst {
-                       break
-               }
-               sz := auxIntToInt64(call_2.AuxInt)
-               if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) {
-                       break
-               }
-               v.reset(OpMove)
-               v.AuxInt = int64ToAuxInt(sz)
-               v.AddArg3(dst, src, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiFloat32x16 x y)
-       // result: (VINSERTF64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiFloat32x8 x y)
-       // result: (VINSERTF128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(2)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiFloat64x4 x y)
-       // result: (VINSERTF128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiFloat64x8 x y)
-       // result: (VINSERTF64X4512 [1] x y)
+       // match: (LessFloat32x4 x y)
+       // result: (VCMPPS128 [1] x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
+               v.reset(OpAMD64VCMPPS128)
                 v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
+       // match: (LessFloat32x8 x y)
+       // result: (VCMPPS256 [1] x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
+               v.reset(OpAMD64VCMPPS256)
                 v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       // match: (LessFloat64x2 x y)
+       // result: (VCMPPD128 [1] x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
+               v.reset(OpAMD64VCMPPD128)
                 v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       // match: (LessFloat64x4 x y)
+       // result: (VCMPPD256 [1] x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
+               v.reset(OpAMD64VCMPPD256)
                 v.AuxInt = uint8ToAuxInt(1)
                 v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiInt8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint16x16 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint16x32 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
+func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint32x16 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpLessUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint32x8 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LessUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(1)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoad(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (Load <t> ptr mem)
+       // cond: (is64BitInt(t) || isPtr(t))
+       // result: (MOVQload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is64BitInt(t) || isPtr(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVQload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is32BitInt(t)
+       // result: (MOVLload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is32BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVLload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is16BitInt(t)
+       // result: (MOVWload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is16BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVWload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: (t.IsBoolean() || is8BitInt(t))
+       // result: (MOVBload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.IsBoolean() || is8BitInt(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVBload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is32BitFloat(t)
+       // result: (MOVSSload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is32BitFloat(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: is64BitFloat(t)
+       // result: (MOVSDload ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(is64BitFloat(t)) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDload)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 16
+       // result: (VMOVDQUload128 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload128)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 32
+       // result: (VMOVDQUload256 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload256)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       // match: (Load <t> ptr mem)
+       // cond: t.Size() == 64
+       // result: (VMOVDQUload512 ptr mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUload512)
+               v.AddArg2(ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x16 <t> ptr mem)
+       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x16)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x32 <t> ptr mem)
+       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask16x8 <t> ptr mem)
+       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec16x8)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x16 <t> ptr mem)
+       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x4 <t> ptr mem)
+       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x4)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask32x8 <t> ptr mem)
+       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec32x8)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
+               return true
+       }
+}
+func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       // match: (LoadMask64x2 <t> ptr mem)
+       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
+       for {
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x2)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint64x4 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       // match: (LoadMask64x4 <t> ptr mem)
+       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x4)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint64x8 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       // match: (LoadMask64x8 <t> ptr mem)
+       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint8x32 x y)
-       // result: (VINSERTI128256 [1] x y)
+       b := v.Block
+       // match: (LoadMask8x16 <t> ptr mem)
+       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x16)
+               v.Type = types.TypeVec128
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetHiUint8x64 x y)
-       // result: (VINSERTI64X4512 [1] x y)
+       b := v.Block
+       // match: (LoadMask8x32 <t> ptr mem)
+       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(1)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x32)
+               v.Type = types.TypeVec256
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
+func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoFloat32x16 x y)
-       // result: (VINSERTF64X4512 [0] x y)
+       b := v.Block
+       // match: (LoadMask8x64 <t> ptr mem)
+       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mem := v_1
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v.Type = types.TypeVec512
+               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
+               v0.AddArg2(ptr, mem)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoFloat32x8 x y)
-       // result: (VINSERTF128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked16 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK16load512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK16load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked32(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoFloat64x4 x y)
-       // result: (VINSERTF128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK32load128 ptr mask mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load128)
+               v.AddArg3(ptr, mask, mem)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoFloat64x8 x y)
-       // result: (VINSERTF64X4512 [0] x y)
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK32load256 ptr mask mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTF64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load256)
+               v.AddArg3(ptr, mask, mem)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (LoadMasked32 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK32load512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked64(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoInt16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK64load128 ptr mask mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load128)
+               v.AddArg3(ptr, mask, mem)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK64load256 ptr mask mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load256)
+               v.AddArg3(ptr, mask, mem)
+               return true
+       }
+       // match: (LoadMasked64 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
+       for {
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
+func rewriteValueAMD64_OpLoadMasked8(v *Value) bool {
+       v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoInt32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (LoadMasked8 <t> ptr mask mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK8load512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) mem)
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               ptr := v_0
+               mask := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK8load512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
+               v0.AddArg(mask)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
+func rewriteValueAMD64_OpLocalAddr(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoInt64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (LocalAddr <t> {sym} base mem)
+       // cond: t.Elem().HasPointers()
+       // result: (LEAQ {sym} (SPanchored base mem))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               t := v.Type
+               sym := auxToSym(v.Aux)
+               base := v_0
+               mem := v_1
+               if !(t.Elem().HasPointers()) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v0 := b.NewValue0(v.Pos, OpSPanchored, typ.Uintptr)
+               v0.AddArg2(base, mem)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (LocalAddr <t> {sym} base _)
+       // cond: !t.Elem().HasPointers()
+       // result: (LEAQ {sym} base)
+       for {
+               t := v.Type
+               sym := auxToSym(v.Aux)
+               base := v_0
+               if !(!t.Elem().HasPointers()) {
+                       break
+               }
+               v.reset(OpAMD64LEAQ)
+               v.Aux = symToAux(sym)
+               v.AddArg(base)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoInt64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoInt8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoInt8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint16x16 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoUint16x32 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh16x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint32x16 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (Lsh16x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoUint32x8 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (Lsh16x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint64x4 x y)
-       // result: (VINSERTI128256 [0] x y)
+       // match: (Lsh16x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpLsh32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoUint64x8 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       b := v.Block
+       // match: (Lsh32x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
+               return true
+       }
+       // match: (Lsh32x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpLsh32x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (SetLoUint8x32 x y)
-       // result: (VINSERTI128256 [0] x y)
+       b := v.Block
+       // match: (Lsh32x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI128256)
-               v.AuxInt = uint8ToAuxInt(0)
-               v.AddArg2(x, y)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (SetLoUint8x64 x y)
-       // result: (VINSERTI64X4512 [0] x y)
+       // match: (Lsh32x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               v.reset(OpAMD64VINSERTI64X4512)
-               v.AuxInt = uint8ToAuxInt(0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
                 v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh32x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh32x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh32x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Lsh32x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Lsh32x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Lsh64x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Lsh64x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Lsh64x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask)
-       // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh64x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask)
-       // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh64x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask)
-       // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Lsh64x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLQ x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask)
-       // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Lsh8x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask)
-       // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Lsh8x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask)
-       // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Lsh8x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask)
-       // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Lsh8x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask)
-       // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Lsh8x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
         for {
-               a := auxIntToUint8(v.AuxInt)
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask)
-       // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Lsh8x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHLDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpLsh8x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Lsh8x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHLL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Lsh8x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHLL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHLL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMax32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Max32F <t> x y)
+       // result: (Neg32F <t> (Min32F <t> (Neg32F <t> x) (Neg32F <t> y)))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpNeg32F)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpMin32F, t)
+               v1 := b.NewValue0(v.Pos, OpNeg32F, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpNeg32F, t)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMax64F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Max64F <t> x y)
+       // result: (Neg64F <t> (Min64F <t> (Neg64F <t> x) (Neg64F <t> y)))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpNeg64F)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpMin64F, t)
+               v1 := b.NewValue0(v.Pos, OpNeg64F, t)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpNeg64F, t)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMin32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Min32F <t> x y)
+       // result: (POR (MINSS <t> (MINSS <t> x y) x) (MINSS <t> x y))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64MINSS, t)
+               v1.AddArg2(x, y)
+               v0.AddArg2(v1, x)
+               v.AddArg2(v0, v1)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMin64F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Min64F <t> x y)
+       // result: (POR (MINSD <t> (MINSD <t> x y) x) (MINSD <t> x y))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+               v1 := b.NewValue0(v.Pos, OpAMD64MINSD, t)
+               v1.AddArg2(x, y)
+               v0.AddArg2(v1, x)
+               v.AddArg2(v0, v1)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod16 [a] x y)
+       // result: (Select1 (DIVW [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod16u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod16u x y)
+       // result: (Select1 (DIVWU x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedInt64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod32 [a] x y)
+       // result: (Select1 (DIVL [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVL, types.NewTuple(typ.Int32, typ.Int32))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod32u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x16 x y mask)
-       // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod32u x y)
+       // result: (Select1 (DIVLU x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVLU, types.NewTuple(typ.UInt32, typ.UInt32))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x32 x y mask)
-       // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod64 [a] x y)
+       // result: (Select1 (DIVQ [a] x y))
         for {
+               a := auxIntToBool(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQ, types.NewTuple(typ.Int64, typ.Int64))
+               v0.AuxInt = boolToAuxInt(a)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod64u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint16x8 x y mask)
-       // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod64u x y)
+       // result: (Select1 (DIVQU x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVQU, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x16 x y mask)
-       // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod8 x y)
+       // result: (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y)))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVW, types.NewTuple(typ.Int16, typ.Int16))
+               v1 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpSignExt8to16, typ.Int16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpMod8u(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x4 x y mask)
-       // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Mod8u x y)
+       // result: (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpSelect1)
+               v0 := b.NewValue0(v.Pos, OpAMD64DIVWU, types.NewTuple(typ.UInt16, typ.UInt16))
+               v1 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v1.AddArg(x)
+               v2 := b.NewValue0(v.Pos, OpZeroExt8to16, typ.UInt16)
+               v2.AddArg(y)
+               v0.AddArg2(v1, v2)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool {
+func rewriteValueAMD64_OpMove(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint32x8 x y mask)
-       // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Move [0] _ _ mem)
+       // result: mem
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               mem := v_2
+               v.copyOf(mem)
+               return true
+       }
+       // match: (Move [1] dst src mem)
+       // result: (MOVBstore dst (MOVBload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [2] dst src mem)
+       // result: (MOVWstore dst (MOVWload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 2 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [4] dst src mem)
+       // result: (MOVLstore dst (MOVLload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 4 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [8] dst src mem)
+       // result: (MOVQstore dst (MOVQload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [16] dst src mem)
+       // result: (MOVOstore dst (MOVOload src mem) mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 16 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVOstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
+               v0.AddArg2(src, mem)
+               v.AddArg3(dst, v0, mem)
+               return true
+       }
+       // match: (Move [3] dst src mem)
+       // result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 3 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(2)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(2)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVWstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [5] dst src mem)
+       // result: (MOVBstore [4] dst (MOVBload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 5 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(4)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(4)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [6] dst src mem)
+       // result: (MOVWstore [4] dst (MOVWload [4] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 6 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(4)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = int32ToAuxInt(4)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [7] dst src mem)
+       // result: (MOVLstore [3] dst (MOVLload [3] src mem) (MOVLstore dst (MOVLload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 7 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(3)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(3)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVLstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [9] dst src mem)
+       // result: (MOVBstore [8] dst (MOVBload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 9 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVBstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBload, typ.UInt8)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [10] dst src mem)
+       // result: (MOVWstore [8] dst (MOVWload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 10 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVWstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWload, typ.UInt16)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [11] dst src mem)
+       // result: (MOVLstore [7] dst (MOVLload [7] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 11 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(7)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(7)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [12] dst src mem)
+       // result: (MOVLstore [8] dst (MOVLload [8] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               if auxIntToInt64(v.AuxInt) != 12 {
+                       break
+               }
+               dst := v_0
+               src := v_1
+               mem := v_2
+               v.reset(OpAMD64MOVLstore)
+               v.AuxInt = int32ToAuxInt(8)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVLload, typ.UInt32)
+               v0.AuxInt = int32ToAuxInt(8)
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s >= 13 && s <= 15
+       // result: (MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s >= 13 && s <= 15) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AuxInt = int32ToAuxInt(int32(s - 8))
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v0.AuxInt = int32ToAuxInt(int32(s - 8))
+               v0.AddArg2(src, mem)
+               v1 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v2.AddArg2(src, mem)
+               v1.AddArg3(dst, v2, mem)
+               v.AddArg3(dst, v0, v1)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > 16 && s < 192 && logLargeCopy(v, s)
+       // result: (LoweredMove [s] dst src mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > 16 && s < 192 && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64LoweredMove)
+               v.AuxInt = int64ToAuxInt(s)
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)
+       // result: (LoweredMoveLoop [s] dst src mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64LoweredMoveLoop)
+               v.AuxInt = int64ToAuxInt(s)
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > repMoveThreshold && s%8 != 0
+       // result: (Move [s-s%8] (OffPtr <dst.Type> dst [s%8]) (OffPtr <src.Type> src [s%8]) (MOVQstore dst (MOVQload src mem) mem))
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > repMoveThreshold && s%8 != 0) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(s - s%8)
+               v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
+               v0.AuxInt = int64ToAuxInt(s % 8)
+               v0.AddArg(dst)
+               v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
+               v1.AuxInt = int64ToAuxInt(s % 8)
+               v1.AddArg(src)
+               v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
+               v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
+               v3.AddArg2(src, mem)
+               v2.AddArg3(dst, v3, mem)
+               v.AddArg3(v0, v1, v2)
+               return true
+       }
+       // match: (Move [s] dst src mem)
+       // cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)
+       // result: (REPMOVSQ dst src (MOVQconst [s/8]) mem)
+       for {
+               s := auxIntToInt64(v.AuxInt)
+               dst := v_0
+               src := v_1
+               mem := v_2
+               if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) {
+                       break
+               }
+               v.reset(OpAMD64REPMOVSQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(s / 8)
+               v.AddArg4(dst, src, v0, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNeg32F(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x2 x y mask)
-       // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Neg32F x)
+       // result: (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64PXOR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVSSconst, typ.Float32)
+               v0.AuxInt = float32ToAuxInt(float32(math.Copysign(0, -1)))
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNeg64F(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x4 x y mask)
-       // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Neg64F x)
+       // result: (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64PXOR)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVSDconst, typ.Float64)
+               v0.AuxInt = float64ToAuxInt(math.Copysign(0, -1))
+               v.AddArg2(x, v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllLeftMaskedUint64x8 x y mask)
-       // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Neq16 x y)
+       // result: (SETNE (CMPW x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPW, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Neq32 x y)
+       // result: (SETNE (CMPL x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPL, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq32F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Neq32F x y)
+       // result: (SETNEF (UCOMISS x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISS, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Neq64 x y)
+       // result: (SETNE (CMPQ x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq64F(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask)
-       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Neq64F x y)
+       // result: (SETNEF (UCOMISD x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNEF)
+               v0 := b.NewValue0(v.Pos, OpAMD64UCOMISD, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeq8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask)
-       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Neq8 x y)
+       // result: (SETNE (CMPB x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeqB(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask)
-       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (NeqB x y)
+       // result: (SETNE (CMPB x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPB, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNeqPtr(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask)
-       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (NeqPtr x y)
+       // result: (SETNE (CMPQ x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SETNE)
+               v0 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpNot(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask)
-       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Not x)
+       // result: (XORLconst [1] x)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask)
-       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualFloat32x16 x y)
+       // result: (VPMOVMToVec32x16 (VCMPPS512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask)
-       // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat32x4 x y)
+       // result: (VCMPPS128 [4] x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS128)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask)
-       // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat32x8 x y)
+       // result: (VCMPPS256 [4] x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPS256)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x2(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask)
-       // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat64x2 x y)
+       // result: (VCMPPD128 [4] x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDWMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD128)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask)
-       // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (NotEqualFloat64x4 x y)
+       // result: (VCMPPD256 [4] x y)
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VCMPPD256)
+               v.AuxInt = uint8ToAuxInt(4)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask)
-       // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualFloat64x8 x y)
+       // result: (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask)
-       // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDDMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask)
-       // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked128)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask)
-       // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked256)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask)
-       // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualInt8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
         for {
-               a := auxIntToUint8(v.AuxInt)
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSHRDQMasked512)
-               v.AuxInt = uint8ToAuxInt(a)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt16x16 x y mask)
-       // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint16x32 x y)
+       // result: (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec16x32)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt16x32 x y mask)
-       // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint32x16 x y)
+       // result: (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec32x16)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt16x8 x y mask)
-       // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint64x8 x y)
+       // result: (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec64x8)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt32x16 x y mask)
-       // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (NotEqualUint8x64 x y)
+       // result: (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VPMOVMToVec8x64)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
+               v0.AuxInt = uint8ToAuxInt(4)
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpOffPtr(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt32x4 x y mask)
-       // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (OffPtr [off] ptr)
+       // cond: is32Bit(off)
+       // result: (ADDQconst [int32(off)] ptr)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               off := auxIntToInt64(v.AuxInt)
+               ptr := v_0
+               if !(is32Bit(off)) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(int32(off))
+               v.AddArg(ptr)
+               return true
+       }
+       // match: (OffPtr [off] ptr)
+       // result: (ADDQ (MOVQconst [off]) ptr)
+       for {
+               off := auxIntToInt64(v.AuxInt)
+               ptr := v_0
+               v.reset(OpAMD64ADDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(off)
+               v.AddArg2(v0, ptr)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpPopCount16(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt32x8 x y mask)
-       // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (PopCount16 x)
+       // result: (POPCNTL (MOVWQZX <typ.UInt32> x))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRADMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POPCNTL)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVWQZX, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpPopCount8(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftAllRightMaskedInt64x2 x y mask)
-       // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (PopCount8 x)
+       // result: (POPCNTL (MOVBQZX <typ.UInt32> x))
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64POPCNTL)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVBQZX, typ.UInt32)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEven(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedInt64x4 x y mask)
-       // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (RoundToEven x)
+       // result: (ROUNDSD [0] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64ROUNDSD)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedInt64x8 x y mask)
-       // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat32x4 x)
+       // result: (VROUNDPS128 [0] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x16 x y mask)
-       // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat32x8 x)
+       // result: (VROUNDPS256 [0] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPS256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x32 x y mask)
-       // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat64x2 x)
+       // result: (VROUNDPD128 [0] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD128)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint16x8 x y mask)
-       // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenFloat64x4 x)
+       // result: (VROUNDPD256 [0] x)
         for {
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VROUNDPD256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x16 x y mask)
-       // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x16 [a] x)
+       // result: (VRNDSCALEPS512 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x4 x y mask)
-       // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x4 [a] x)
+       // result: (VRNDSCALEPS128 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint32x8 x y mask)
-       // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat32x8 [a] x)
+       // result: (VRNDSCALEPS256 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x2 x y mask)
-       // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x2 [a] x)
+       // result: (VRNDSCALEPD128 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x4 x y mask)
-       // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x4 [a] x)
+       // result: (VRNDSCALEPD256 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftAllRightMaskedUint64x8 x y mask)
-       // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledFloat64x8 [a] x)
+       // result: (VRNDSCALEPD512 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VRNDSCALEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x16 x y z mask)
-       // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x16 [a] x)
+       // result: (VREDUCEPS512 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x32 x y z mask)
-       // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x4 [a] x)
+       // result: (VREDUCEPS128 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat32x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt16x8 x y z mask)
-       // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat32x8 [a] x)
+       // result: (VREDUCEPS256 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPS256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x2(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x16 x y z mask)
-       // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x2 [a] x)
+       // result: (VREDUCEPD128 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD128)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x4(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x4 x y z mask)
-       // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x4 [a] x)
+       // result: (VREDUCEPD256 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD256)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpRoundToEvenScaledResidueFloat64x8(v *Value) bool {
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt32x8 x y z mask)
-       // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (RoundToEvenScaledResidueFloat64x8 [a] x)
+       // result: (VREDUCEPD512 [a+0] x)
         for {
+               a := auxIntToUint8(v.AuxInt)
                 x := v_0
-               y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               v.reset(OpAMD64VREDUCEPD512)
+               v.AuxInt = uint8ToAuxInt(a + 0)
+               v.AddArg(x)
                 return true
         }
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x2 x y z mask)
-       // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPWconst y [16])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x4 x y z mask)
-       // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedInt64x8 x y z mask)
-       // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPLconst y [16])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x16 x y z mask)
-       // result: (VPSHLDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x32 x y z mask)
-       // result: (VPSHLDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPQconst y [16])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint16x8 x y z mask)
-       // result: (VPSHLDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16Ux8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x16 x y z mask)
-       // result: (VPSHLDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMPBconst y [16])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRW, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(16)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x4 x y z mask)
-       // result: (VPSHLDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh16Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedUint32x8 x y z mask)
-       // result: (VPSHLDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [16])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x2 x y z mask)
-       // result: (VPSHLDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh16x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x4 x y z mask)
-       // result: (VPSHLDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh16x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [16])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftConcatMaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftConcatMaskedUint64x8 x y z mask)
-       // result: (VPSHLDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHLDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedInt16x16 x y mask)
-       // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh16x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [16])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt16x32 x y mask)
-       // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh16x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh16x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedInt16x8 x y mask)
-       // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh16x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARW <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [16])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(16)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt32x16 x y mask)
-       // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh16x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARW x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARW)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedInt32x4 x y mask)
-       // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPWconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt32x8 x y mask)
-       // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedInt64x2 x y mask)
-       // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPLconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedInt64x4 x y mask)
-       // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedInt64x8 x y mask)
-       // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPQconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint16x16 x y mask)
-       // result: (VPSLLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32Ux8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedUint16x32 x y mask)
-       // result: (VPSLLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMPBconst y [32])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRL, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(32)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint16x8 x y mask)
-       // result: (VPSLLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh32Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedUint32x16 x y mask)
-       // result: (VPSLLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh32x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [32])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint32x4 x y mask)
-       // result: (VPSLLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh32x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedUint32x8 x y mask)
-       // result: (VPSLLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh32x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [32])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint64x2 x y mask)
-       // result: (VPSLLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh32x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftLeftMaskedUint64x4 x y mask)
-       // result: (VPSLLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh32x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [32])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftLeftMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftLeftMaskedUint64x8 x y mask)
-       // result: (VPSLLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh32x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSLLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x16 x y z mask)
-       // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh32x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARL <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [32])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(32)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x32 x y z mask)
-       // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh32x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARL x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARL)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedInt16x8 x y z mask)
-       // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPWconst y [64])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x16 x y z mask)
-       // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x4 x y z mask)
-       // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPLconst y [64])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt32x8 x y z mask)
-       // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x2 x y z mask)
-       // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPQconst y [64])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x4 x y z mask)
-       // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedInt64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64Ux8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedInt64x8 x y z mask)
-       // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMPBconst y [64])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQ)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRQ, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(64)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x16 x y z mask)
-       // result: (VPSHRDVWMasked256 x y z (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh64Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x32 x y z mask)
-       // result: (VPSHRDVWMasked512 x y z (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh64x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [64])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint16x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint16x8 x y z mask)
-       // result: (VPSHRDVWMasked128 x y z (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x16 x y z mask)
-       // result: (VPSHRDVDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh64x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [64])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x4 x y z mask)
-       // result: (VPSHRDVDMasked128 x y z (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh64x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint32x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedUint32x8 x y z mask)
-       // result: (VPSHRDVDMasked256 x y z (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [64])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x2(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x2 x y z mask)
-       // result: (VPSHRDVQMasked128 x y z (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh64x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x4(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x4 x y z mask)
-       // result: (VPSHRDVQMasked256 x y z (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh64x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARQ <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [64])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(64)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightConcatMaskedUint64x8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightConcatMaskedUint64x8 x y z mask)
-       // result: (VPSHRDVQMasked512 x y z (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh64x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARQ x y)
         for {
                 x := v_0
                 y := v_1
-               z := v_2
-               mask := v_3
-               v.reset(OpAMD64VPSHRDVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(x, y, z, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARQ)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedInt16x16 x y mask)
-       // result: (VPSRAVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPWconst y [8])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v2.AuxInt = int16ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt16x32 x y mask)
-       // result: (VPSRAVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedInt16x8 x y mask)
-       // result: (VPSRAVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPLconst y [8])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt32x16 x y mask)
-       // result: (VPSRAVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedInt32x4 x y mask)
-       // result: (VPSRAVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPQconst y [8])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v2.AuxInt = int32ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt32x8 x y mask)
-       // result: (VPSRAVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8Ux8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedInt64x2 x y mask)
-       // result: (VPSRAVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMPBconst y [8])))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64ANDL)
+               v0 := b.NewValue0(v.Pos, OpAMD64SHRB, t)
+               v0.AddArg2(x, y)
+               v1 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, t)
+               v2 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v2.AuxInt = int8ToAuxInt(8)
+               v2.AddArg(y)
+               v1.AddArg(v2)
+               v.AddArg2(v0, v1)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedInt64x4 x y mask)
-       // result: (VPSRAVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Rsh8Ux8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SHRB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SHRB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedInt64x8 x y mask)
-       // result: (VPSRAVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x16 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPWconst y [8])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRAVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPWconst, types.TypeFlags)
+               v3.AuxInt = int16ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint16x16 x y mask)
-       // result: (VPSRLVWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (Rsh8x16 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedUint16x32 x y mask)
-       // result: (VPSRLVWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (Rsh8x32 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPLconst y [8])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPLconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint16x8 x y mask)
-       // result: (VPSRLVWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x32 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedUint32x16 x y mask)
-       // result: (VPSRLVDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Rsh8x64 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORQ <y.Type> y (NOTQ <y.Type> (SBBQcarrymask <y.Type> (CMPQconst y [8])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORQ, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTQ, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
+               v3.AuxInt = int32ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint32x4 x y mask)
-       // result: (VPSRLVDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Rsh8x64 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpRsh8x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedUint32x8 x y mask)
-       // result: (VPSRLVDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Rsh8x8 <t> x y)
+       // cond: !shiftIsBounded(v)
+       // result: (SARB <t> x (ORL <y.Type> y (NOTL <y.Type> (SBBLcarrymask <y.Type> (CMPBconst y [8])))))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(!shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.Type = t
+               v0 := b.NewValue0(v.Pos, OpAMD64ORL, y.Type)
+               v1 := b.NewValue0(v.Pos, OpAMD64NOTL, y.Type)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBLcarrymask, y.Type)
+               v3 := b.NewValue0(v.Pos, OpAMD64CMPBconst, types.TypeFlags)
+               v3.AuxInt = int8ToAuxInt(8)
+               v3.AddArg(y)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg2(y, v1)
+               v.AddArg2(x, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint64x2 x y mask)
-       // result: (VPSRLVQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Rsh8x8 x y)
+       // cond: shiftIsBounded(v)
+       // result: (SARB x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if !(shiftIsBounded(v)) {
+                       break
+               }
+               v.reset(OpAMD64SARB)
+               v.AddArg2(x, y)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpShiftRightMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelect0(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (ShiftRightMaskedUint64x4 x y mask)
-       // result: (VPSRLVQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Select0 (Mul64uover x y))
+       // result: (Select0 <typ.UInt64> (MULQU x y))
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpShiftRightMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (ShiftRightMaskedUint64x8 x y mask)
-       // result: (VPSRLVQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Select0 (Mul32uover x y))
+       // result: (Select0 <typ.UInt32> (MULLU x y))
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSRLVQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               if v_0.Op != OpMul32uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt32
+               v0 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSlicemask(v *Value) bool {
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Slicemask <t> x)
-       // result: (SARQconst (NEGQ <t> x) [63])
+       // match: (Select0 (Add64carry x y c))
+       // result: (Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
         for {
-               t := v.Type
-               x := v_0
-               v.reset(OpAMD64SARQconst)
-               v.AuxInt = int8ToAuxInt(63)
-               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
-               v0.AddArg(x)
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v2.AddArg(c)
+               v1.AddArg(v2)
+               v0.AddArg3(x, y, v1)
                 v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSpectreIndex(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (SpectreIndex <t> x y)
-       // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
+       // match: (Select0 (Sub64borrow x y c))
+       // result: (Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64CMOVQCC)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v1.AddArg2(x, y)
-               v.AddArg3(x, v0, v1)
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v2.AddArg(c)
+               v1.AddArg(v2)
+               v0.AddArg3(x, y, v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (SpectreSliceIndex <t> x y)
-       // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
+       // match: (Select0 <t> (AddTupleFirst32 val tuple))
+       // result: (ADDL val (Select0 <t> tuple))
         for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64CMOVQHI)
-               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
-               v0.AuxInt = int64ToAuxInt(0)
-               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
-               v1.AddArg2(x, y)
-               v.AddArg3(x, v0, v1)
+               t := v.Type
+               if v_0.Op != OpAMD64AddTupleFirst32 {
+                       break
+               }
+               tuple := v_0.Args[1]
+               val := v_0.Args[0]
+               v.reset(OpAMD64ADDL)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg2(val, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x16 x mask)
-       // result: (VSQRTPSMasked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Select0 <t> (AddTupleFirst64 val tuple))
+       // result: (ADDQ val (Select0 <t> tuple))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               t := v.Type
+               if v_0.Op != OpAMD64AddTupleFirst64 {
+                       break
+               }
+               tuple := v_0.Args[1]
+               val := v_0.Args[0]
+               v.reset(OpAMD64ADDQ)
+               v0 := b.NewValue0(v.Pos, OpSelect0, t)
+               v0.AddArg(tuple)
+               v.AddArg2(val, v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x4 x mask)
-       // result: (VSQRTPSMasked128 x (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (Select0 a:(ADDQconstflags [c] x))
+       // cond: a.Uses == 1
+       // result: (ADDQconst [c] x)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               a := v_0
+               if a.Op != OpAMD64ADDQconstflags {
+                       break
+               }
+               c := auxIntToInt32(a.AuxInt)
+               x := a.Args[0]
+               if !(a.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64ADDQconst)
+               v.AuxInt = int32ToAuxInt(c)
+               v.AddArg(x)
                 return true
         }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat32x8 x mask)
-       // result: (VSQRTPSMasked256 x (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (Select0 a:(ADDLconstflags [c] x))
+       // cond: a.Uses == 1
+       // result: (ADDLconst [c] x)
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               a := v_0
+               if a.Op != OpAMD64ADDLconstflags {
+                       break
+               }
+               c := auxIntToInt32(a.AuxInt)
+               x := a.Args[0]
+               if !(a.Uses == 1) {
+                       break
+               }
+               v.reset(OpAMD64ADDLconst)
+               v.AuxInt = int32ToAuxInt(c)
+               v.AddArg(x)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSqrtMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelect1(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SqrtMaskedFloat64x2 x mask)
-       // result: (VSQRTPDMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (Select1 (Mul64uover x y))
+       // result: (SETO (Select1 <types.TypeFlags> (MULQU x y)))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpMul64uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETO)
+               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpAMD64MULQU, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v1.AddArg2(x, y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat64x4 x mask)
-       // result: (VSQRTPDMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (Select1 (Mul32uover x y))
+       // result: (SETO (Select1 <types.TypeFlags> (MULLU x y)))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpMul32uover {
+                       break
+               }
+               y := v_0.Args[1]
+               x := v_0.Args[0]
+               v.reset(OpAMD64SETO)
+               v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v1 := b.NewValue0(v.Pos, OpAMD64MULLU, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v1.AddArg2(x, y)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpSqrtMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SqrtMaskedFloat64x8 x mask)
-       // result: (VSQRTPDMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (Select1 (Add64carry x y c))
+       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
         for {
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VSQRTPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
+               if v_0.Op != OpAdd64carry {
+                       break
+               }
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64NEGQ)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64ADCQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v4.AddArg(c)
+               v3.AddArg(v4)
+               v2.AddArg3(x, y, v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-}
-func rewriteValueAMD64_OpStore(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 8 && t.IsFloat()
-       // result: (MOVSDstore ptr val mem)
+       // match: (Select1 (Sub64borrow x y c))
+       // result: (NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 8 && t.IsFloat()) {
+               if v_0.Op != OpSub64borrow {
                         break
                 }
-               v.reset(OpAMD64MOVSDstore)
-               v.AddArg3(ptr, val, mem)
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpAMD64NEGQ)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpAMD64SBBQcarrymask, typ.UInt64)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpAMD64SBBQ, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4 := b.NewValue0(v.Pos, OpAMD64NEGLflags, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v4.AddArg(c)
+               v3.AddArg(v4)
+               v2.AddArg3(x, y, v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 4 && t.IsFloat()
-       // result: (MOVSSstore ptr val mem)
+       // match: (Select1 (NEGLflags (MOVQconst [0])))
+       // result: (FlagEQ)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 4 && t.IsFloat()) {
+               if v_0.Op != OpAMD64NEGLflags {
                         break
                 }
-               v.reset(OpAMD64MOVSSstore)
-               v.AddArg3(ptr, val, mem)
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_0_0.AuxInt) != 0 {
+                       break
+               }
+               v.reset(OpAMD64FlagEQ)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 8 && !t.IsFloat()
-       // result: (MOVQstore ptr val mem)
+       // match: (Select1 (NEGLflags (NEGQ (SBBQcarrymask x))))
+       // result: x
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 8 && !t.IsFloat()) {
+               if v_0.Op != OpAMD64NEGLflags {
                         break
                 }
-               v.reset(OpAMD64MOVQstore)
-               v.AddArg3(ptr, val, mem)
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpAMD64NEGQ {
+                       break
+               }
+               v_0_0_0 := v_0_0.Args[0]
+               if v_0_0_0.Op != OpAMD64SBBQcarrymask {
+                       break
+               }
+               x := v_0_0_0.Args[0]
+               v.copyOf(x)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 4 && !t.IsFloat()
-       // result: (MOVLstore ptr val mem)
+       // match: (Select1 (AddTupleFirst32 _ tuple))
+       // result: (Select1 tuple)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 4 && !t.IsFloat()) {
+               if v_0.Op != OpAMD64AddTupleFirst32 {
                         break
                 }
-               v.reset(OpAMD64MOVLstore)
-               v.AddArg3(ptr, val, mem)
+               tuple := v_0.Args[1]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 2
-       // result: (MOVWstore ptr val mem)
+       // match: (Select1 (AddTupleFirst64 _ tuple))
+       // result: (Select1 tuple)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 2) {
+               if v_0.Op != OpAMD64AddTupleFirst64 {
                         break
                 }
-               v.reset(OpAMD64MOVWstore)
-               v.AddArg3(ptr, val, mem)
+               tuple := v_0.Args[1]
+               v.reset(OpSelect1)
+               v.AddArg(tuple)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 1
-       // result: (MOVBstore ptr val mem)
+       // match: (Select1 a:(LoweredAtomicAnd64 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ANDQlock ptr val mem)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 1) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicAnd64 {
                         break
                 }
-               v.reset(OpAMD64MOVBstore)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ANDQlock)
                 v.AddArg3(ptr, val, mem)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 16
-       // result: (VMOVDQUstore128 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicAnd32 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ANDLlock ptr val mem)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 16) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicAnd32 {
                         break
                 }
-               v.reset(OpAMD64VMOVDQUstore128)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ANDLlock)
                 v.AddArg3(ptr, val, mem)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 32
-       // result: (VMOVDQUstore256 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicOr64 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ORQlock ptr val mem)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 32) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicOr64 {
                         break
                 }
-               v.reset(OpAMD64VMOVDQUstore256)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ORQlock)
                 v.AddArg3(ptr, val, mem)
                 return true
         }
-       // match: (Store {t} ptr val mem)
-       // cond: t.Size() == 64
-       // result: (VMOVDQUstore512 ptr val mem)
+       // match: (Select1 a:(LoweredAtomicOr32 ptr val mem))
+       // cond: a.Uses == 1 && clobber(a)
+       // result: (ORLlock ptr val mem)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               if !(t.Size() == 64) {
+               a := v_0
+               if a.Op != OpAMD64LoweredAtomicOr32 {
                         break
                 }
-               v.reset(OpAMD64VMOVDQUstore512)
+               mem := a.Args[2]
+               ptr := a.Args[0]
+               val := a.Args[1]
+               if !(a.Uses == 1 && clobber(a)) {
+                       break
+               }
+               v.reset(OpAMD64ORLlock)
                 v.AddArg3(ptr, val, mem)
                 return true
         }
         return false
  }
-func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSelectN(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (StoreMask16x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
+       config := b.Func.Config
+       // match: (SelectN [0] call:(CALLstatic {sym} s1:(MOVQstoreconst _ [sc] s2:(MOVQstore _ src s3:(MOVQstore _ dst mem)))))
+       // cond: sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)
+       // result: (Move [sc.Val64()] dst src mem)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               call := v_0
+               if call.Op != OpAMD64CALLstatic || len(call.Args) != 1 {
+                       break
+               }
+               sym := auxToCall(call.Aux)
+               s1 := call.Args[0]
+               if s1.Op != OpAMD64MOVQstoreconst {
+                       break
+               }
+               sc := auxIntToValAndOff(s1.AuxInt)
+               _ = s1.Args[1]
+               s2 := s1.Args[1]
+               if s2.Op != OpAMD64MOVQstore {
+                       break
+               }
+               _ = s2.Args[2]
+               src := s2.Args[1]
+               s3 := s2.Args[2]
+               if s3.Op != OpAMD64MOVQstore {
+                       break
+               }
+               mem := s3.Args[2]
+               dst := s3.Args[1]
+               if !(sc.Val64() >= 0 && isSameCall(sym, "runtime.memmove") && s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1 && isInlinableMemmove(dst, src, sc.Val64(), config) && clobber(s1, s2, s3, call)) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(sc.Val64())
+               v.AddArg3(dst, src, mem)
+               return true
+       }
+       // match: (SelectN [0] call:(CALLstatic {sym} dst src (MOVQconst [sz]) mem))
+       // cond: sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)
+       // result: (Move [sz] dst src mem)
+       for {
+               if auxIntToInt64(v.AuxInt) != 0 {
+                       break
+               }
+               call := v_0
+               if call.Op != OpAMD64CALLstatic || len(call.Args) != 4 {
+                       break
+               }
+               sym := auxToCall(call.Aux)
+               mem := call.Args[3]
+               dst := call.Args[0]
+               src := call.Args[1]
+               call_2 := call.Args[2]
+               if call_2.Op != OpAMD64MOVQconst {
+                       break
+               }
+               sz := auxIntToInt64(call_2.AuxInt)
+               if !(sz >= 0 && isSameCall(sym, "runtime.memmove") && call.Uses == 1 && isInlinableMemmove(dst, src, sz, config) && clobber(call)) {
+                       break
+               }
+               v.reset(OpMove)
+               v.AuxInt = int64ToAuxInt(sz)
+               v.AddArg3(dst, src, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
+       // match: (SetHiFloat32x16 x y)
+       // result: (VINSERTF64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
+       // match: (SetHiFloat32x8 x y)
+       // result: (VINSERTF128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
+       // match: (SetHiFloat64x4 x y)
+       // result: (VINSERTF128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
+       // match: (SetHiFloat64x8 x y)
+       // result: (VINSERTF64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
+       // match: (SetHiInt16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x2 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
+       // match: (SetHiInt16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
+       // match: (SetHiInt32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
+       // match: (SetHiInt32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
+       // match: (SetHiInt64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
+       // match: (SetHiInt64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x64 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
+       // match: (SetHiInt8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked16 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
+       // match: (SetHiInt8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK16store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK32store128 ptr mask val mem)
+       // match: (SetHiUint16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store128)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK32store256 ptr mask val mem)
+}
+func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store256)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (StoreMasked32 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
+}
+func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK32store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 16
-       // result: (VPMASK64store128 ptr mask val mem)
+       // match: (SetHiUint32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 16) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store128)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 32
-       // result: (VPMASK64store256 ptr mask val mem)
+}
+func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 32) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store256)
-               v.AddArg4(ptr, mask, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       // match: (StoreMasked64 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
+}
+func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK64store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMasked8 {t} ptr mask val mem)
-       // cond: t.Size() == 64
-       // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
+       // match: (SetHiUint8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
         for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               mask := v_1
-               val := v_2
-               mem := v_3
-               if !(t.Size() == 64) {
-                       break
-               }
-               v.reset(OpAMD64VPMASK8store512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg4(ptr, v0, val, mem)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
-       return false
  }
-func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x16 x y mask)
-       // result: (VSUBPSMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetHiUint8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(1)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedFloat32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x4 x y mask)
-       // result: (VSUBPSMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x16 x y)
+       // result: (VINSERTF64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedFloat32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat32x8 x y mask)
-       // result: (VSUBPSMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetLoFloat32x8 x y)
+       // result: (VINSERTF128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPSMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedFloat64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x2 x y mask)
-       // result: (VSUBPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x4 x y)
+       // result: (VINSERTF128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedFloat64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x4 x y mask)
-       // result: (VSUBPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetLoFloat64x8 x y)
+       // result: (VINSERTF64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedFloat64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedFloat64x8 x y mask)
-       // result: (VSUBPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VSUBPDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x16 x y mask)
-       // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (SetLoInt16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x32 x y mask)
-       // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (SetLoInt32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt16x8 x y mask)
-       // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x16 x y mask)
-       // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (SetLoInt64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x4 x y mask)
-       // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       // match: (SetLoInt64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt32x8 x y mask)
-       // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       // match: (SetLoInt8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x2 x y mask)
-       // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (SetLoInt8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x4 x y mask)
-       // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (SetLoUint16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt64x8 x y mask)
-       // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (SetLoUint16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt8x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x16 x y mask)
-       // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (SetLoUint32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt8x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x32 x y mask)
-       // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (SetLoUint32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedInt8x64(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedInt8x64 x y mask)
-       // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (SetLoUint64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint16x16(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x16 x y mask)
-       // result: (VPSUBWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (SetLoUint64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint16x32(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x32 x y mask)
-       // result: (VPSUBWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (SetLoUint8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint16x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint16x8 x y mask)
-       // result: (VPSUBWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (SetLoUint8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = uint8ToAuxInt(0)
+               v.AddArg2(x, y)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
+func rewriteValueAMD64_OpSlicemask(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint32x16 x y mask)
-       // result: (VPSUBDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
+       // match: (Slicemask <t> x)
+       // result: (SARQconst (NEGQ <t> x) [63])
         for {
+               t := v.Type
                 x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64SARQconst)
+               v.AuxInt = int8ToAuxInt(63)
+               v0 := b.NewValue0(v.Pos, OpAMD64NEGQ, t)
+               v0.AddArg(x)
+               v.AddArg(v0)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSpectreIndex(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint32x4 x y mask)
-       // result: (VPSUBDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (SpectreIndex <t> x y)
+       // result: (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64CMOVQCC)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v1.AddArg2(x, y)
+               v.AddArg3(x, v0, v1)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
+func rewriteValueAMD64_OpSpectreSliceIndex(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint32x8 x y mask)
-       // result: (VPSUBDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
+       typ := &b.Func.Config.Types
+       // match: (SpectreSliceIndex <t> x y)
+       // result: (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
         for {
                 x := v_0
                 y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.reset(OpAMD64CMOVQHI)
+               v0 := b.NewValue0(v.Pos, OpAMD64MOVQconst, typ.UInt64)
+               v0.AuxInt = int64ToAuxInt(0)
+               v1 := b.NewValue0(v.Pos, OpAMD64CMPQ, types.TypeFlags)
+               v1.AddArg2(x, y)
+               v.AddArg3(x, v0, v1)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint64x2(v *Value) bool {
+func rewriteValueAMD64_OpStore(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubMaskedUint64x2 x y mask)
-       // result: (VPSUBQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 8 && t.IsFloat()
+       // result: (MOVSDstore ptr val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 8 && t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVSDstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 4 && t.IsFloat()
+       // result: (MOVSSstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 4 && t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVSSstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 8 && !t.IsFloat()
+       // result: (MOVQstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 8 && !t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVQstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 4 && !t.IsFloat()
+       // result: (MOVLstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 4 && !t.IsFloat()) {
+                       break
+               }
+               v.reset(OpAMD64MOVLstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 2
+       // result: (MOVWstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 2) {
+                       break
+               }
+               v.reset(OpAMD64MOVWstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 1
+       // result: (MOVBstore ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 1) {
+                       break
+               }
+               v.reset(OpAMD64MOVBstore)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 16
+       // result: (VMOVDQUstore128 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore128)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 32
+       // result: (VMOVDQUstore256 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore256)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
+       // match: (Store {t} ptr val mem)
+       // cond: t.Size() == 64
+       // result: (VMOVDQUstore512 ptr val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VMOVDQUstore512)
+               v.AddArg3(ptr, val, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSubMaskedUint64x4(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint64x4 x y mask)
-       // result: (VPSUBQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
+       // match: (StoreMask16x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint64x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint64x8 x y mask)
-       // result: (VPSUBQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
+       // match: (StoreMask16x32 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint8x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint8x16 x y mask)
-       // result: (VPSUBBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMask16x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint8x32 x y mask)
-       // result: (VPSUBBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMask32x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubMaskedUint8x64 x y mask)
-       // result: (VPSUBBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMask32x4 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt16x16 x y mask)
-       // result: (VPSUBSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (StoreMask32x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt16x32 x y mask)
-       // result: (VPSUBSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (StoreMask64x2 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt16x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt16x8 x y mask)
-       // result: (VPSUBSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (StoreMask64x4 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt8x16 x y mask)
-       // result: (VPSUBSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMask64x8 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt8x32 x y mask)
-       // result: (VPSUBSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMask8x16 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedInt8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedInt8x64 x y mask)
-       // result: (VPSUBSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMask8x32 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBSBMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x16(v *Value) bool {
+func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedUint16x16 x y mask)
-       // result: (VPSUBUSWMasked256 x y (VPMOVVec16x16ToM <types.TypeMask> mask))
+       // match: (StoreMask8x64 {t} ptr val mem)
+       // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               val := v_1
+               mem := v_2
+               v.reset(OpAMD64KMOVQstore)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
+               v0.AddArg(val)
+               v.AddArg3(ptr, v0, mem)
                 return true
         }
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
+       v_3 := v.Args[3]
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedUint16x32 x y mask)
-       // result: (VPSUBUSWMasked512 x y (VPMOVVec16x32ToM <types.TypeMask> mask))
+       // match: (StoreMasked16 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK16store512 ptr (VPMOVVec16x32ToM <types.TypeMask> mask) val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked512)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK16store512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint16x8(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked32(v *Value) bool {
+       v_3 := v.Args[3]
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedUint16x8 x y mask)
-       // result: (VPSUBUSWMasked128 x y (VPMOVVec16x8ToM <types.TypeMask> mask))
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK32store128 ptr mask val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSWMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store128)
+               v.AddArg4(ptr, mask, val, mem)
                 return true
         }
-}
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (SubSaturatedMaskedUint8x16 x y mask)
-       // result: (VPSUBUSBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK32store256 ptr mask val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, types.TypeMask)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store256)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked32 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK32store512 ptr (VPMOVVec32x16ToM <types.TypeMask> mask) val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK32store512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x32(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked64(v *Value) bool {
+       v_3 := v.Args[3]
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedUint8x32 x y mask)
-       // result: (VPSUBUSBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 16
+       // result: (VPMASK64store128 ptr mask val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, types.TypeMask)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 16) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store128)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 32
+       // result: (VPMASK64store256 ptr mask val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 32) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store256)
+               v.AddArg4(ptr, mask, val, mem)
+               return true
+       }
+       // match: (StoreMasked64 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK64store512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) val mem)
+       for {
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK64store512)
+               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                 return true
         }
+       return false
  }
-func rewriteValueAMD64_OpSubSaturatedMaskedUint8x64(v *Value) bool {
+func rewriteValueAMD64_OpStoreMasked8(v *Value) bool {
+       v_3 := v.Args[3]
         v_2 := v.Args[2]
         v_1 := v.Args[1]
         v_0 := v.Args[0]
         b := v.Block
-       // match: (SubSaturatedMaskedUint8x64 x y mask)
-       // result: (VPSUBUSBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+       // match: (StoreMasked8 {t} ptr mask val mem)
+       // cond: t.Size() == 64
+       // result: (VPMASK8store512 ptr (VPMOVVec8x64ToM <types.TypeMask> mask) val mem)
         for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPSUBUSBMasked512)
+               t := auxToType(v.Aux)
+               ptr := v_0
+               mask := v_1
+               val := v_2
+               mem := v_3
+               if !(t.Size() == 64) {
+                       break
+               }
+               v.reset(OpAMD64VPMASK8store512)
                 v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, types.TypeMask)
                 v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
+               v.AddArg4(ptr, v0, val, mem)
                 return true
         }
+       return false
  }
  func rewriteValueAMD64_OpTrunc(v *Value) bool {
         v_0 := v.Args[0]
@@ -56567,114 +38729,6 @@ func rewriteValueAMD64_OpTruncScaledFloat64x8(v *Value) bool {
                 return true
         }
  }
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x16 [a] x mask)
-       // result: (VRNDSCALEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x4 [a] x mask)
-       // result: (VRNDSCALEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat32x8 [a] x mask)
-       // result: (VRNDSCALEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x2 [a] x mask)
-       // result: (VRNDSCALEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x4 [a] x mask)
-       // result: (VRNDSCALEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledMaskedFloat64x8 [a] x mask)
-       // result: (VRNDSCALEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VRNDSCALEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
  func rewriteValueAMD64_OpTruncScaledResidueFloat32x16(v *Value) bool {
         v_0 := v.Args[0]
         // match: (TruncScaledResidueFloat32x16 [a] x)
@@ -56753,330 +38807,6 @@ func rewriteValueAMD64_OpTruncScaledResidueFloat64x8(v *Value) bool {
                 return true
         }
  }
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x16 [a] x mask)
-       // result: (VREDUCEPSMasked512 [a+3] x (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x4 [a] x mask)
-       // result: (VREDUCEPSMasked128 [a+3] x (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat32x8 [a] x mask)
-       // result: (VREDUCEPSMasked256 [a+3] x (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPSMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x2 [a] x mask)
-       // result: (VREDUCEPDMasked128 [a+3] x (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked128)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x4 [a] x mask)
-       // result: (VREDUCEPDMasked256 [a+3] x (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked256)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpTruncScaledResidueMaskedFloat64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (TruncScaledResidueMaskedFloat64x8 [a] x mask)
-       // result: (VREDUCEPDMasked512 [a+3] x (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               a := auxIntToUint8(v.AuxInt)
-               x := v_0
-               mask := v_1
-               v.reset(OpAMD64VREDUCEPDMasked512)
-               v.AuxInt = uint8ToAuxInt(a + 3)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg2(x, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x16 x y mask)
-       // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x4 x y mask)
-       // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt32x8 x y mask)
-       // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x2 x y mask)
-       // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x4 x y mask)
-       // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedInt64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedInt64x8 x y mask)
-       // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x16 x y mask)
-       // result: (VPXORDMasked512 x y (VPMOVVec32x16ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x4 x y mask)
-       // result: (VPXORDMasked128 x y (VPMOVVec32x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint32x8 x y mask)
-       // result: (VPXORDMasked256 x y (VPMOVVec32x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORDMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x2 x y mask)
-       // result: (VPXORQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked128)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x4 x y mask)
-       // result: (VPXORQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked256)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpXorMaskedUint64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (XorMaskedUint64x8 x y mask)
-       // result: (VPXORQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-       for {
-               x := v_0
-               y := v_1
-               mask := v_2
-               v.reset(OpAMD64VPXORQMasked512)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask)
-               v0.AddArg(mask)
-               v.AddArg3(x, y, v0)
-               return true
-       }
-}
  func rewriteValueAMD64_OpZero(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go

index 90149300b2ccdab29dacf34d3f7cc7e6e7c9c4ed..e6c6874bddc3b3156cb476db63f18a06afe64717 100644 (file)
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -24,18 +24,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Int64x2.Abs", opLen1(ssa.OpAbsInt64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int64x4.Abs", opLen1(ssa.OpAbsInt64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int64x8.Abs", opLen1(ssa.OpAbsInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AbsMasked", opLen2(ssa.OpAbsMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AbsMasked", opLen2(ssa.OpAbsMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AbsMasked", opLen2(ssa.OpAbsMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AbsMasked", opLen2(ssa.OpAbsMaskedInt64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
@@ -69,51 +57,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AddDotProdPairsSaturatedMasked", opLen4(ssa.OpAddDotProdPairsSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadrupleMasked", opLen4_31(ssa.OpAddDotProdQuadrupleMaskedInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturatedMasked", opLen4_31(ssa.OpAddDotProdQuadrupleSaturatedMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.AddMasked", opLen3(ssa.OpAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.AddMasked", opLen3(ssa.OpAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.AddMasked", opLen3(ssa.OpAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.AddMasked", opLen3(ssa.OpAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.AddMasked", opLen3(ssa.OpAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.AddMasked", opLen3(ssa.OpAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddMasked", opLen3(ssa.OpAddMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddMasked", opLen3(ssa.OpAddMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddMasked", opLen3(ssa.OpAddMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AddMasked", opLen3(ssa.OpAddMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AddMasked", opLen3(ssa.OpAddMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AddMasked", opLen3(ssa.OpAddMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AddMasked", opLen3(ssa.OpAddMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AddMasked", opLen3(ssa.OpAddMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AddMasked", opLen3(ssa.OpAddMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AddMasked", opLen3(ssa.OpAddMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AddMasked", opLen3(ssa.OpAddMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AddMasked", opLen3(ssa.OpAddMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AddMasked", opLen3(ssa.OpAddMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AddMasked", opLen3(ssa.OpAddMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AddMasked", opLen3(ssa.OpAddMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AddMasked", opLen3(ssa.OpAddMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AddMasked", opLen3(ssa.OpAddMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AddMasked", opLen3(ssa.OpAddMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AddMasked", opLen3(ssa.OpAddMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AddMasked", opLen3(ssa.OpAddMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AddMasked", opLen3(ssa.OpAddMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AddMasked", opLen3(ssa.OpAddMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AddMasked", opLen3(ssa.OpAddMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AddMasked", opLen3(ssa.OpAddMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -140,18 +89,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint16x16.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint16x32.AddSaturated", opLen2(ssa.OpAddSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AddSaturatedMasked", opLen3(ssa.OpAddSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
@@ -180,18 +117,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AndMasked", opLen3(ssa.OpAndMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AndMasked", opLen3(ssa.OpAndMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AndMasked", opLen3(ssa.OpAndMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AndMasked", opLen3(ssa.OpAndMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AndMasked", opLen3(ssa.OpAndMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AndMasked", opLen3(ssa.OpAndMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AndMasked", opLen3(ssa.OpAndMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AndMasked", opLen3(ssa.OpAndMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AndMasked", opLen3(ssa.OpAndMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AndMasked", opLen3(ssa.OpAndMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AndMasked", opLen3(ssa.OpAndMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.AndNot", opLen2_21(ssa.OpAndNotInt8x64, types.TypeVec512), sys.AMD64)
@@ -216,30 +141,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.AndNot", opLen2_21(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.AndNot", opLen2_21(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.AndNot", opLen2_21(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.AndNotMasked", opLen3_21(ssa.OpAndNotMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.AverageMasked", opLen3(ssa.OpAverageMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.AverageMasked", opLen3(ssa.OpAverageMaskedUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
@@ -250,16 +157,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast128Masked", opLen2(ssa.OpBroadcast128MaskedUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
@@ -270,16 +167,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedFloat64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedInt64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast256Masked", opLen2(ssa.OpBroadcast256MaskedUint64x2, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
@@ -290,16 +177,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedFloat64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedInt64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast512Masked", opLen2(ssa.OpBroadcast512MaskedUint64x2, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
@@ -310,24 +187,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.CeilScaled", opLen1Imm8(ssa.OpCeilScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilScaledMasked", opLen2Imm8(ssa.OpCeilScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float32x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float32x16.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float64x2.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.CeilScaledResidue", opLen1Imm8(ssa.OpCeilScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilScaledResidueMasked", opLen2Imm8(ssa.OpCeilScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.Compress", opLen2(ssa.OpCompressFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Compress", opLen2(ssa.OpCompressFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Compress", opLen2(ssa.OpCompressFloat32x16, types.TypeVec512), sys.AMD64)
@@ -361,15 +226,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float32x4.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.ConvertToInt32", opLen1(ssa.OpConvertToInt32Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ConvertToInt32Masked", opLen2(ssa.OpConvertToInt32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.ConvertToUint32", opLen1(ssa.OpConvertToUint32Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ConvertToUint32Masked", opLen2(ssa.OpConvertToUint32MaskedFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.CopySign", opLen2(ssa.OpCopySignInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.CopySign", opLen2(ssa.OpCopySignInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int16x8.CopySign", opLen2(ssa.OpCopySignInt16x8, types.TypeVec128), sys.AMD64)
@@ -382,24 +241,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.DivMasked", opLen3(ssa.OpDivMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.DivMasked", opLen3(ssa.OpDivMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.DivMasked", opLen3(ssa.OpDivMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.DivMasked", opLen3(ssa.OpDivMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.DivMasked", opLen3(ssa.OpDivMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.DivMasked", opLen3(ssa.OpDivMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.DotProdPairsMasked", opLen3(ssa.OpDotProdPairsMaskedInt16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.DotProdPairsSaturatedMasked", opLen3(ssa.OpDotProdPairsSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
@@ -430,36 +277,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.EqualMasked", opLen3(ssa.OpEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.EqualMasked", opLen3(ssa.OpEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.EqualMasked", opLen3(ssa.OpEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.EqualMasked", opLen3(ssa.OpEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.EqualMasked", opLen3(ssa.OpEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.EqualMasked", opLen3(ssa.OpEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.EqualMasked", opLen3(ssa.OpEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.EqualMasked", opLen3(ssa.OpEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Expand", opLen2(ssa.OpExpandFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Expand", opLen2(ssa.OpExpandFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Expand", opLen2(ssa.OpExpandFloat32x16, types.TypeVec512), sys.AMD64)
@@ -500,42 +317,21 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.FloorScaled", opLen1Imm8(ssa.OpFloorScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorScaledMasked", opLen2Imm8(ssa.OpFloorScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float32x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float32x16.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float64x2.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.FloorScaledResidue", opLen1Imm8(ssa.OpFloorScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorScaledResidueMasked", opLen2Imm8(ssa.OpFloorScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Uint8x16.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x16, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint8x32.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x32, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint8x64.GaloisFieldAffineTransform", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformUint8x64, types.TypeVec512, 0), sys.AMD64)
         addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x16, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x32, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverse", opLen2Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseUint8x64, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformInverseMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformInverseMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldAffineTransformMasked", opLen3Imm8_2I(ssa.OpGaloisFieldAffineTransformMaskedUint8x64, types.TypeVec512, 0), sys.AMD64)
         addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
         addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
         addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
@@ -622,78 +418,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.GreaterMasked", opLen3(ssa.OpGreaterMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.IsNanMasked", opLen3(ssa.OpIsNanMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
@@ -722,66 +452,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.LessMasked", opLen3(ssa.OpLessMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.LessMasked", opLen3(ssa.OpLessMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.LessMasked", opLen3(ssa.OpLessMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.LessMasked", opLen3(ssa.OpLessMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.LessMasked", opLen3(ssa.OpLessMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.LessMasked", opLen3(ssa.OpLessMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.LessMasked", opLen3(ssa.OpLessMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.LessMasked", opLen3(ssa.OpLessMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.LessMasked", opLen3(ssa.OpLessMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.LessMasked", opLen3(ssa.OpLessMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.LessMasked", opLen3(ssa.OpLessMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.LessMasked", opLen3(ssa.OpLessMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.LessMasked", opLen3(ssa.OpLessMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.LessMasked", opLen3(ssa.OpLessMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.LessMasked", opLen3(ssa.OpLessMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.LessMasked", opLen3(ssa.OpLessMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.LessMasked", opLen3(ssa.OpLessMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.LessMasked", opLen3(ssa.OpLessMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.LessMasked", opLen3(ssa.OpLessMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.LessMasked", opLen3(ssa.OpLessMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.LessMasked", opLen3(ssa.OpLessMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.LessMasked", opLen3(ssa.OpLessMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.LessMasked", opLen3(ssa.OpLessMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.LessMasked", opLen3(ssa.OpLessMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.LessMasked", opLen3(ssa.OpLessMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.LessMasked", opLen3(ssa.OpLessMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.LessMasked", opLen3(ssa.OpLessMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.LessMasked", opLen3(ssa.OpLessMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.LessMasked", opLen3(ssa.OpLessMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.LessMasked", opLen3(ssa.OpLessMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
@@ -812,36 +482,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaxMasked", opLen3(ssa.OpMaxMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaxMasked", opLen3(ssa.OpMaxMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaxMasked", opLen3(ssa.OpMaxMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaxMasked", opLen3(ssa.OpMaxMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaxMasked", opLen3(ssa.OpMaxMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaxMasked", opLen3(ssa.OpMaxMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaxMasked", opLen3(ssa.OpMaxMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaxMasked", opLen3(ssa.OpMaxMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaxMasked", opLen3(ssa.OpMaxMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaxMasked", opLen3(ssa.OpMaxMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
@@ -872,36 +512,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MinMasked", opLen3(ssa.OpMinMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MinMasked", opLen3(ssa.OpMinMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MinMasked", opLen3(ssa.OpMinMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MinMasked", opLen3(ssa.OpMinMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MinMasked", opLen3(ssa.OpMinMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MinMasked", opLen3(ssa.OpMinMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.MinMasked", opLen3(ssa.OpMinMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.MinMasked", opLen3(ssa.OpMinMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.MinMasked", opLen3(ssa.OpMinMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MinMasked", opLen3(ssa.OpMinMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MinMasked", opLen3(ssa.OpMinMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MinMasked", opLen3(ssa.OpMinMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MinMasked", opLen3(ssa.OpMinMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MinMasked", opLen3(ssa.OpMinMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MinMasked", opLen3(ssa.OpMinMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MinMasked", opLen3(ssa.OpMinMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MinMasked", opLen3(ssa.OpMinMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MinMasked", opLen3(ssa.OpMinMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MinMasked", opLen3(ssa.OpMinMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MinMasked", opLen3(ssa.OpMinMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MinMasked", opLen3(ssa.OpMinMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MinMasked", opLen3(ssa.OpMinMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MinMasked", opLen3(ssa.OpMinMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MinMasked", opLen3(ssa.OpMinMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MinMasked", opLen3(ssa.OpMinMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MinMasked", opLen3(ssa.OpMinMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MinMasked", opLen3(ssa.OpMinMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MinMasked", opLen3(ssa.OpMinMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MinMasked", opLen3(ssa.OpMinMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MinMasked", opLen3(ssa.OpMinMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
@@ -932,24 +542,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.MulAdd", opLen3(ssa.OpMulAddFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.MulAdd", opLen3(ssa.OpMulAddFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.MulAdd", opLen3(ssa.OpMulAddFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulAddMasked", opLen4(ssa.OpMulAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.MulAddSub", opLen3(ssa.OpMulAddSubFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.MulAddSub", opLen3(ssa.OpMulAddSubFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulAddSubMasked", opLen4(ssa.OpMulAddSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
@@ -960,48 +558,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MulHighMasked", opLen3(ssa.OpMulHighMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulMasked", opLen3(ssa.OpMulMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulMasked", opLen3(ssa.OpMulMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulMasked", opLen3(ssa.OpMulMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulMasked", opLen3(ssa.OpMulMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulMasked", opLen3(ssa.OpMulMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulMasked", opLen3(ssa.OpMulMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulMasked", opLen3(ssa.OpMulMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulMasked", opLen3(ssa.OpMulMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulMasked", opLen3(ssa.OpMulMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MulMasked", opLen3(ssa.OpMulMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MulMasked", opLen3(ssa.OpMulMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.MulMasked", opLen3(ssa.OpMulMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.MulMasked", opLen3(ssa.OpMulMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MulMasked", opLen3(ssa.OpMulMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MulMasked", opLen3(ssa.OpMulMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MulMasked", opLen3(ssa.OpMulMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MulMasked", opLen3(ssa.OpMulMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MulMasked", opLen3(ssa.OpMulMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MulMasked", opLen3(ssa.OpMulMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MulMasked", opLen3(ssa.OpMulMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MulMasked", opLen3(ssa.OpMulMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MulMasked", opLen3(ssa.OpMulMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MulMasked", opLen3(ssa.OpMulMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MulMasked", opLen3(ssa.OpMulMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.MulSubAdd", opLen3(ssa.OpMulSubAddFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.MulSubAdd", opLen3(ssa.OpMulSubAddFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulSubAddMasked", opLen4(ssa.OpMulSubAddMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1016,36 +578,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.OnesCount", opLen1(ssa.OpOnesCountInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.OnesCount", opLen1(ssa.OpOnesCountInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.OnesCount", opLen1(ssa.OpOnesCountInt8x64, types.TypeVec512), sys.AMD64)
@@ -1070,30 +602,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.OnesCount", opLen1(ssa.OpOnesCountUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.OnesCount", opLen1(ssa.OpOnesCountUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.OnesCount", opLen1(ssa.OpOnesCountUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.OnesCountMasked", opLen2(ssa.OpOnesCountMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
@@ -1118,18 +626,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.OrMasked", opLen3(ssa.OpOrMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.OrMasked", opLen3(ssa.OpOrMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.OrMasked", opLen3(ssa.OpOrMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.OrMasked", opLen3(ssa.OpOrMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.OrMasked", opLen3(ssa.OpOrMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.OrMasked", opLen3(ssa.OpOrMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.OrMasked", opLen3(ssa.OpOrMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.OrMasked", opLen3(ssa.OpOrMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.OrMasked", opLen3(ssa.OpOrMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.OrMasked", opLen3(ssa.OpOrMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.OrMasked", opLen3(ssa.OpOrMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.OrMasked", opLen3(ssa.OpOrMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.Permute", opLen2_21(ssa.OpPermuteInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint8x16.Permute", opLen2_21(ssa.OpPermuteUint8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.Permute", opLen2_21(ssa.OpPermuteInt8x32, types.TypeVec256), sys.AMD64)
@@ -1184,84 +680,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x8.Permute2", opLen3_231(ssa.OpPermute2Float64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int64x8.Permute2", opLen3_231(ssa.OpPermute2Int64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Uint64x8.Permute2", opLen3_231(ssa.OpPermute2Uint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Permute2Masked", opLen4_231(ssa.OpPermute2MaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.PermuteMasked", opLen3_21(ssa.OpPermuteMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Reciprocal", opLen1(ssa.OpReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Reciprocal", opLen1(ssa.OpReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Reciprocal", opLen1(ssa.OpReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.Reciprocal", opLen1(ssa.OpReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Reciprocal", opLen1(ssa.OpReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.Reciprocal", opLen1(ssa.OpReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ReciprocalMasked", opLen2(ssa.OpReciprocalMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.ReciprocalSqrt", opLen1(ssa.OpReciprocalSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ReciprocalSqrtMasked", opLen2(ssa.OpReciprocalSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int32x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x4, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Int32x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x8, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Int32x16.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1274,18 +704,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x2, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint64x4.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x4, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint64x8.RotateAllLeft", opLen1Imm8(ssa.OpRotateAllLeftUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateAllLeftMasked", opLen2Imm8(ssa.OpRotateAllLeftMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
         addF(simdPackage, "Int32x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x4, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Int32x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x8, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Int32x16.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightInt32x16, types.TypeVec512, 0), sys.AMD64)
@@ -1298,18 +716,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x2, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint64x4.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x4, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint64x8.RotateAllRight", opLen1Imm8(ssa.OpRotateAllRightUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateAllRightMasked", opLen2Imm8(ssa.OpRotateAllRightMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
         addF(simdPackage, "Int32x4.RotateLeft", opLen2(ssa.OpRotateLeftInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int32x8.RotateLeft", opLen2(ssa.OpRotateLeftInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int32x16.RotateLeft", opLen2(ssa.OpRotateLeftInt32x16, types.TypeVec512), sys.AMD64)
@@ -1322,18 +728,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.RotateLeft", opLen2(ssa.OpRotateLeftUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.RotateLeft", opLen2(ssa.OpRotateLeftUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.RotateLeft", opLen2(ssa.OpRotateLeftUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateLeftMasked", opLen3(ssa.OpRotateLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int32x4.RotateRight", opLen2(ssa.OpRotateRightInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int32x8.RotateRight", opLen2(ssa.OpRotateRightInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int32x16.RotateRight", opLen2(ssa.OpRotateRightInt32x16, types.TypeVec512), sys.AMD64)
@@ -1346,18 +740,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.RotateRight", opLen2(ssa.OpRotateRightUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.RotateRight", opLen2(ssa.OpRotateRightUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.RotateRight", opLen2(ssa.OpRotateRightUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.RotateRightMasked", opLen3(ssa.OpRotateRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.RoundToEven", opLen1(ssa.OpRoundToEvenFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.RoundToEven", opLen1(ssa.OpRoundToEvenFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1368,36 +750,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.RoundToEvenScaled", opLen1Imm8(ssa.OpRoundToEvenScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.RoundToEvenScaledMasked", opLen2Imm8(ssa.OpRoundToEvenScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float32x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float32x16.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float64x2.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.RoundToEvenScaledResidue", opLen1Imm8(ssa.OpRoundToEvenScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.RoundToEvenScaledResidueMasked", opLen2Imm8(ssa.OpRoundToEvenScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.Scale", opLen2(ssa.OpScaleFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Scale", opLen2(ssa.OpScaleFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Scale", opLen2(ssa.OpScaleFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.Scale", opLen2(ssa.OpScaleFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Scale", opLen2(ssa.OpScaleFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.Scale", opLen2(ssa.OpScaleFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.SetElem", opLen2Imm8(ssa.OpSetElemFloat32x4, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Float64x2.SetElem", opLen2Imm8(ssa.OpSetElemFloat64x2, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
@@ -1484,42 +848,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint64x4.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint64x8.ShiftAllLeftConcat", opLen2Imm8(ssa.OpShiftAllLeftConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllLeftConcatMasked", opLen3Imm8(ssa.OpShiftAllLeftConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllLeftMasked", opLen3(ssa.OpShiftAllLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int16x8.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int16x16.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int16x32.ShiftAllRight", opLen2(ssa.OpShiftAllRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1556,42 +884,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x2, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Uint64x4.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x4, types.TypeVec256, 0), sys.AMD64)
         addF(simdPackage, "Uint64x8.ShiftAllRightConcat", opLen2Imm8(ssa.OpShiftAllRightConcatUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedInt64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint16x32, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint32x16, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x2, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllRightConcatMasked", opLen3Imm8(ssa.OpShiftAllRightConcatMaskedUint64x8, types.TypeVec512, 0), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftAllRightMasked", opLen3(ssa.OpShiftAllRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int16x8.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int16x16.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int16x32.ShiftLeft", opLen2(ssa.OpShiftLeftInt16x32, types.TypeVec512), sys.AMD64)
@@ -1628,42 +920,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.ShiftLeftConcat", opLen3(ssa.OpShiftLeftConcatUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftLeftConcatMasked", opLen4(ssa.OpShiftLeftConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftLeftMasked", opLen3(ssa.OpShiftLeftMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int16x8.ShiftRight", opLen2(ssa.OpShiftRightInt16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int16x16.ShiftRight", opLen2(ssa.OpShiftRightInt16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int16x32.ShiftRight", opLen2(ssa.OpShiftRightInt16x32, types.TypeVec512), sys.AMD64)
@@ -1700,54 +956,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.ShiftRightConcat", opLen3(ssa.OpShiftRightConcatUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftRightConcatMasked", opLen4(ssa.OpShiftRightConcatMaskedUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.ShiftRightMasked", opLen3(ssa.OpShiftRightMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.SqrtMasked", opLen2(ssa.OpSqrtMaskedFloat64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
@@ -1778,36 +992,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.SubMasked", opLen3(ssa.OpSubMaskedFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.SubMasked", opLen3(ssa.OpSubMaskedFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x16.SubMasked", opLen3(ssa.OpSubMaskedFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.SubMasked", opLen3(ssa.OpSubMaskedFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.SubMasked", opLen3(ssa.OpSubMaskedFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.SubMasked", opLen3(ssa.OpSubMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.SubMasked", opLen3(ssa.OpSubMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.SubMasked", opLen3(ssa.OpSubMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.SubMasked", opLen3(ssa.OpSubMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.SubMasked", opLen3(ssa.OpSubMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.SubMasked", opLen3(ssa.OpSubMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.SubMasked", opLen3(ssa.OpSubMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.SubMasked", opLen3(ssa.OpSubMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.SubMasked", opLen3(ssa.OpSubMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.SubMasked", opLen3(ssa.OpSubMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.SubMasked", opLen3(ssa.OpSubMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.SubMasked", opLen3(ssa.OpSubMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.SubMasked", opLen3(ssa.OpSubMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SubMasked", opLen3(ssa.OpSubMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SubMasked", opLen3(ssa.OpSubMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SubMasked", opLen3(ssa.OpSubMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SubMasked", opLen3(ssa.OpSubMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SubMasked", opLen3(ssa.OpSubMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SubMasked", opLen3(ssa.OpSubMaskedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.SubMasked", opLen3(ssa.OpSubMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.SubMasked", opLen3(ssa.OpSubMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.SubMasked", opLen3(ssa.OpSubMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.SubMasked", opLen3(ssa.OpSubMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.SubMasked", opLen3(ssa.OpSubMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.SubMasked", opLen3(ssa.OpSubMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.SubPairs", opLen2(ssa.OpSubPairsFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.SubPairs", opLen2(ssa.OpSubPairsFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.SubPairs", opLen2(ssa.OpSubPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1834,18 +1018,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint16x8.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x8, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint16x16.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x16, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint16x32.SubSaturated", opLen2(ssa.OpSubSaturatedUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SubSaturatedMasked", opLen3(ssa.OpSubSaturatedMaskedUint16x32, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
@@ -1856,24 +1028,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Float64x2.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.TruncScaled", opLen1Imm8(ssa.OpTruncScaledFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.TruncScaledMasked", opLen2Imm8(ssa.OpTruncScaledMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float32x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x4, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float32x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x8, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float32x16.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat32x16, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Float64x2.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x2, types.TypeVec128, 4), sys.AMD64)
         addF(simdPackage, "Float64x4.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x4, types.TypeVec256, 4), sys.AMD64)
         addF(simdPackage, "Float64x8.TruncScaledResidue", opLen1Imm8(ssa.OpTruncScaledResidueFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.TruncScaledResidueMasked", opLen2Imm8(ssa.OpTruncScaledResidueMaskedFloat64x8, types.TypeVec512, 4), sys.AMD64)
         addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
@@ -1898,18 +1058,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.XorMasked", opLen3(ssa.OpXorMaskedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.XorMasked", opLen3(ssa.OpXorMaskedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.XorMasked", opLen3(ssa.OpXorMaskedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.XorMasked", opLen3(ssa.OpXorMaskedInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.XorMasked", opLen3(ssa.OpXorMaskedInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.XorMasked", opLen3(ssa.OpXorMaskedInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.XorMasked", opLen3(ssa.OpXorMaskedUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.XorMasked", opLen3(ssa.OpXorMaskedUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.XorMasked", opLen3(ssa.OpXorMaskedUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.XorMasked", opLen3(ssa.OpXorMaskedUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.XorMasked", opLen3(ssa.OpXorMaskedUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.XorMasked", opLen3(ssa.OpXorMaskedUint64x8, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Int8x16.blend", opLen3(ssa.OpblendInt8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Int8x32.blend", opLen3(ssa.OpblendInt8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Int8x64.blendMasked", opLen3(ssa.OpblendMaskedInt8x64, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/_gen/simdgen/godefs.go b/src/simd/_gen/simdgen/godefs.go

index 22decb9d7e69c9dbaaebdbd5d47b838ae273aafc..4044addd8c16ad6ec8f8e5ead86d8a7d6b72d63a 100644 (file)
--- a/src/simd/_gen/simdgen/godefs.go
+++ b/src/simd/_gen/simdgen/godefs.go
@@ -11,6 +11,7 @@ import (
         "slices"
         "strconv"
         "strings"
+       "unicode"
  
         "simd/_gen/unify"
  )
@@ -100,6 +101,11 @@ func (o *Operation) DecodeUnified(v *unify.Value) error {
         o.Documentation = regexp.MustCompile(`\bNAME\b`).ReplaceAllString(o.Documentation, o.Go)
         if isMasked {
                 o.Documentation += "\n//\n// This operation is applied selectively under a write mask."
+               if unicode.IsUpper([]rune(o.Go)[0]) {
+                       trueVal := "true"
+                       o.NoGenericOps = &trueVal
+                       o.NoTypes = &trueVal
+               }
         }
  
         o.In = append(o.rawOperation.In, o.rawOperation.InVariant...)
diff --git a/src/simd/compare_test.go b/src/simd/compare_test.go

index 7fd20cf5d79f2ae572dc3616d16da35313895687..f8526d27e989d70574a18618fbd7f626dd7f638e 100644 (file)
--- a/src/simd/compare_test.go
+++ b/src/simd/compare_test.go
@@ -15,44 +15,6 @@ import (
  // from > and =
  var comparisonFixed bool = simd.HasAVX512()
  
-func TestLessMasked(t *testing.T) {
-       if simd.HasAVX512() {
-               testFloat32x4CompareMasked(t, simd.Float32x4.LessMasked, lessSlice[float32])
-               testFloat32x8CompareMasked(t, simd.Float32x8.LessMasked, lessSlice[float32])
-               testFloat64x2CompareMasked(t, simd.Float64x2.LessMasked, lessSlice[float64])
-               testFloat64x4CompareMasked(t, simd.Float64x4.LessMasked, lessSlice[float64])
-
-               testInt16x16CompareMasked(t, simd.Int16x16.LessMasked, lessSlice[int16])
-               testInt16x8CompareMasked(t, simd.Int16x8.LessMasked, lessSlice[int16])
-               testInt32x4CompareMasked(t, simd.Int32x4.LessMasked, lessSlice[int32])
-               testInt32x8CompareMasked(t, simd.Int32x8.LessMasked, lessSlice[int32])
-               testInt64x2CompareMasked(t, simd.Int64x2.LessMasked, lessSlice[int64])
-               testInt64x4CompareMasked(t, simd.Int64x4.LessMasked, lessSlice[int64])
-               testInt8x16CompareMasked(t, simd.Int8x16.LessMasked, lessSlice[int8])
-               testInt8x32CompareMasked(t, simd.Int8x32.LessMasked, lessSlice[int8])
-
-               testUint16x16CompareMasked(t, simd.Uint16x16.LessMasked, lessSlice[uint16])
-               testUint16x8CompareMasked(t, simd.Uint16x8.LessMasked, lessSlice[uint16])
-               testUint32x4CompareMasked(t, simd.Uint32x4.LessMasked, lessSlice[uint32])
-               testUint32x8CompareMasked(t, simd.Uint32x8.LessMasked, lessSlice[uint32])
-               testUint64x2CompareMasked(t, simd.Uint64x2.LessMasked, lessSlice[uint64])
-               testUint64x4CompareMasked(t, simd.Uint64x4.LessMasked, lessSlice[uint64])
-               testUint8x16CompareMasked(t, simd.Uint8x16.LessMasked, lessSlice[uint8])
-               testUint8x32CompareMasked(t, simd.Uint8x32.LessMasked, lessSlice[uint8])
-
-               testFloat32x16CompareMasked(t, simd.Float32x16.LessMasked, lessSlice[float32])
-               testFloat64x8CompareMasked(t, simd.Float64x8.LessMasked, lessSlice[float64])
-               testInt8x64CompareMasked(t, simd.Int8x64.LessMasked, lessSlice[int8])
-               testInt16x32CompareMasked(t, simd.Int16x32.LessMasked, lessSlice[int16])
-               testInt32x16CompareMasked(t, simd.Int32x16.LessMasked, lessSlice[int32])
-               testInt64x8CompareMasked(t, simd.Int64x8.LessMasked, lessSlice[int64])
-               testUint8x64CompareMasked(t, simd.Uint8x64.LessMasked, lessSlice[uint8])
-               testUint16x32CompareMasked(t, simd.Uint16x32.LessMasked, lessSlice[uint16])
-               testUint32x16CompareMasked(t, simd.Uint32x16.LessMasked, lessSlice[uint32])
-               testUint64x8CompareMasked(t, simd.Uint64x8.LessMasked, lessSlice[uint64])
-       }
-}
-
  func TestLess(t *testing.T) {
         testFloat32x4Compare(t, simd.Float32x4.Less, lessSlice[float32])
         testFloat32x8Compare(t, simd.Float32x8.Less, lessSlice[float32])
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go

index d6fcd065bbbad0776f5523be585e21e31d0b058b..76bbf738cb173625b395cd1cbe5c83fe972f1216 100644 (file)
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -66,92 +66,6 @@ func (x Int64x4) Abs() Int64x4
  // Asm: VPABSQ, CPU Feature: AVX512
  func (x Int64x8) Abs() Int64x8
  
-/* AbsMasked */
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x16) AbsMasked(mask Mask8x16) Int8x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x32) AbsMasked(mask Mask8x32) Int8x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSB, CPU Feature: AVX512
-func (x Int8x64) AbsMasked(mask Mask8x64) Int8x64
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x8) AbsMasked(mask Mask16x8) Int16x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x16) AbsMasked(mask Mask16x16) Int16x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSW, CPU Feature: AVX512
-func (x Int16x32) AbsMasked(mask Mask16x32) Int16x32
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x4) AbsMasked(mask Mask32x4) Int32x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x8) AbsMasked(mask Mask32x8) Int32x8
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSD, CPU Feature: AVX512
-func (x Int32x16) AbsMasked(mask Mask32x16) Int32x16
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x2) AbsMasked(mask Mask64x2) Int64x2
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x4) AbsMasked(mask Mask64x4) Int64x4
-
-// AbsMasked computes the absolute value of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPABSQ, CPU Feature: AVX512
-func (x Int64x8) AbsMasked(mask Mask64x8) Int64x8
-
  /* Add */
  
  // Add adds corresponding elements of two vectors.
@@ -321,29 +235,6 @@ func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
  // Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
  func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
  
-/* AddDotProdPairsSaturatedMasked */
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x4) AddDotProdPairsSaturatedMasked(y Int16x8, z Int16x8, mask Mask32x4) Int32x4
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x8) AddDotProdPairsSaturatedMasked(y Int16x16, z Int16x16, mask Mask32x8) Int32x8
-
-// AddDotProdPairsSaturatedMasked performs dot products on pairs of elements of y and z and then adds x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdPairsSaturatedMasked(y Int16x32, z Int16x32, mask Mask32x16) Int32x16
-
  /* AddDotProdQuadruple */
  
  // AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
@@ -361,29 +252,6 @@ func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
  // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
  func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
  
-/* AddDotProdQuadrupleMasked */
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
-
-// AddDotProdQuadrupleMasked performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
  /* AddDotProdQuadrupleSaturated */
  
  // AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
@@ -401,377 +269,142 @@ func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
  // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
  func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
  
-/* AddDotProdQuadrupleSaturatedMasked */
+/* AddPairs */
  
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x16) AddDotProdQuadrupleSaturatedMasked(y Uint8x16, z Int32x4, mask Mask32x4) Int32x4
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x4) AddPairs(y Float32x4) Float32x4
  
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x32) AddDotProdQuadrupleSaturatedMasked(y Uint8x32, z Int32x8, mask Mask32x8) Int32x8
+// Asm: VHADDPS, CPU Feature: AVX
+func (x Float32x8) AddPairs(y Float32x8) Float32x8
  
-// AddDotProdQuadrupleSaturatedMasked multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleSaturatedMasked(y Uint8x64, z Int32x16, mask Mask32x16) Int32x16
-
-/* AddMasked */
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x2) AddPairs(y Float64x2) Float64x2
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x4) AddMasked(y Float32x4, mask Mask32x4) Float32x4
+// Asm: VHADDPD, CPU Feature: AVX
+func (x Float64x4) AddPairs(y Float64x4) Float64x4
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x8) AddMasked(y Float32x8, mask Mask32x8) Float32x8
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Int16x8) AddPairs(y Int16x8) Int16x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPS, CPU Feature: AVX512
-func (x Float32x16) AddMasked(y Float32x16, mask Mask32x16) Float32x16
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Int16x16) AddPairs(y Int16x16) Int16x16
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x2) AddMasked(y Float64x2, mask Mask64x2) Float64x2
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Int32x4) AddPairs(y Int32x4) Int32x4
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x4) AddMasked(y Float64x4, mask Mask64x4) Float64x4
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Int32x8) AddPairs(y Int32x8) Int32x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VADDPD, CPU Feature: AVX512
-func (x Float64x8) AddMasked(y Float64x8, mask Mask64x8) Float64x8
+// Asm: VPHADDW, CPU Feature: AVX
+func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x16) AddMasked(y Int8x16, mask Mask8x16) Int8x16
+// Asm: VPHADDW, CPU Feature: AVX2
+func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x32) AddMasked(y Int8x32, mask Mask8x32) Int8x32
+// Asm: VPHADDD, CPU Feature: AVX
+func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairs horizontally adds adjacent pairs of elements.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Int8x64) AddMasked(y Int8x64, mask Mask8x64) Int8x64
+// Asm: VPHADDD, CPU Feature: AVX2
+func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x8) AddMasked(y Int16x8, mask Mask16x8) Int16x8
+/* AddPairsSaturated */
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x16) AddMasked(y Int16x16, mask Mask16x16) Int16x16
+// Asm: VPHADDSW, CPU Feature: AVX
+func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
+// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
  //
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Int16x32) AddMasked(y Int16x32, mask Mask16x32) Int16x32
+// Asm: VPHADDSW, CPU Feature: AVX2
+func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x4) AddMasked(y Int32x4, mask Mask32x4) Int32x4
+/* AddSaturated */
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x8) AddMasked(y Int32x8, mask Mask32x8) Int32x8
+// Asm: VPADDSB, CPU Feature: AVX
+func (x Int8x16) AddSaturated(y Int8x16) Int8x16
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Int32x16) AddMasked(y Int32x16, mask Mask32x16) Int32x16
+// Asm: VPADDSB, CPU Feature: AVX2
+func (x Int8x32) AddSaturated(y Int8x32) Int8x32
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x2) AddMasked(y Int64x2, mask Mask64x2) Int64x2
+// Asm: VPADDSB, CPU Feature: AVX512
+func (x Int8x64) AddSaturated(y Int8x64) Int8x64
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x4) AddMasked(y Int64x4, mask Mask64x4) Int64x4
+// Asm: VPADDSW, CPU Feature: AVX
+func (x Int16x8) AddSaturated(y Int16x8) Int16x8
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Int64x8) AddMasked(y Int64x8, mask Mask64x8) Int64x8
+// Asm: VPADDSW, CPU Feature: AVX2
+func (x Int16x16) AddSaturated(y Int16x16) Int16x16
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x16) AddMasked(y Uint8x16, mask Mask8x16) Uint8x16
+// Asm: VPADDSW, CPU Feature: AVX512
+func (x Int16x32) AddSaturated(y Int16x32) Int16x32
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x32) AddMasked(y Uint8x32, mask Mask8x32) Uint8x32
+// Asm: VPADDUSB, CPU Feature: AVX
+func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
  
-// AddMasked adds corresponding elements of two vectors.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPADDUSB, CPU Feature: AVX2
+func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
+
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
-// Asm: VPADDB, CPU Feature: AVX512
-func (x Uint8x64) AddMasked(y Uint8x64, mask Mask8x64) Uint8x64
+// Asm: VPADDUSB, CPU Feature: AVX512
+func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
  
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x8) AddMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x16) AddMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDW, CPU Feature: AVX512
-func (x Uint16x32) AddMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x4) AddMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x8) AddMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDD, CPU Feature: AVX512
-func (x Uint32x16) AddMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x2) AddMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x4) AddMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AddMasked adds corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDQ, CPU Feature: AVX512
-func (x Uint64x8) AddMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* AddPairs */
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x4) AddPairs(y Float32x4) Float32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPS, CPU Feature: AVX
-func (x Float32x8) AddPairs(y Float32x8) Float32x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x2) AddPairs(y Float64x2) Float64x2
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VHADDPD, CPU Feature: AVX
-func (x Float64x4) AddPairs(y Float64x4) Float64x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Int16x8) AddPairs(y Int16x8) Int16x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Int16x16) AddPairs(y Int16x16) Int16x16
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Int32x4) AddPairs(y Int32x4) Int32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Int32x8) AddPairs(y Int32x8) Int32x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX
-func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDW, CPU Feature: AVX2
-func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX
-func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4
-
-// AddPairs horizontally adds adjacent pairs of elements.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDD, CPU Feature: AVX2
-func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8
-
-/* AddPairsSaturated */
-
-// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDSW, CPU Feature: AVX
-func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8
-
-// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation.
-// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...].
-//
-// Asm: VPHADDSW, CPU Feature: AVX2
-func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16
-
-/* AddSaturated */
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX
-func (x Int8x16) AddSaturated(y Int8x16) Int8x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX2
-func (x Int8x32) AddSaturated(y Int8x32) Int8x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturated(y Int8x64) Int8x64
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX
-func (x Int16x8) AddSaturated(y Int16x8) Int16x8
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX2
-func (x Int16x16) AddSaturated(y Int16x16) Int16x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturated(y Int16x32) Int16x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX
-func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX2
-func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64
-
-// AddSaturated adds corresponding elements of two vectors with saturation.
+// AddSaturated adds corresponding elements of two vectors with saturation.
  //
  // Asm: VPADDUSW, CPU Feature: AVX
  func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8
@@ -786,92 +419,6 @@ func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16
  // Asm: VPADDUSW, CPU Feature: AVX512
  func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32
  
-/* AddSaturatedMasked */
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x16) AddSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x32) AddSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSB, CPU Feature: AVX512
-func (x Int8x64) AddSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x8) AddSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x16) AddSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDSW, CPU Feature: AVX512
-func (x Int16x32) AddSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x16) AddSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x32) AddSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSB, CPU Feature: AVX512
-func (x Uint8x64) AddSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x8) AddSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x16) AddSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AddSaturatedMasked adds corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPADDUSW, CPU Feature: AVX512
-func (x Uint16x32) AddSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
  /* AddSub */
  
  // AddSub subtracts even elements and adds odd elements of two vectors.
@@ -1016,105 +563,19 @@ func (x Uint64x4) And(y Uint64x4) Uint64x4
  // Asm: VPANDQ, CPU Feature: AVX512
  func (x Uint64x8) And(y Uint64x8) Uint64x8
  
-/* AndMasked */
+/* AndNot */
  
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AndNot performs a bitwise x &^ y.
  //
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x4) AndMasked(y Int32x4, mask Mask32x4) Int32x4
+// Asm: VPANDN, CPU Feature: AVX
+func (x Int8x16) AndNot(y Int8x16) Int8x16
  
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
+// AndNot performs a bitwise x &^ y.
  //
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x8) AndMasked(y Int32x8, mask Mask32x8) Int32x8
+// Asm: VPANDN, CPU Feature: AVX2
+func (x Int8x32) AndNot(y Int8x32) Int8x32
  
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Int32x16) AndMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x2) AndMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x4) AndMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Int64x8) AndMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x4) AndMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x8) AndMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDD, CPU Feature: AVX512
-func (x Uint32x16) AndMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x2) AndMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x4) AndMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndMasked performs a bitwise AND operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDQ, CPU Feature: AVX512
-func (x Uint64x8) AndMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* AndNot */
-
-// AndNot performs a bitwise x &^ y.
-//
-// Asm: VPANDN, CPU Feature: AVX
-func (x Int8x16) AndNot(y Int8x16) Int8x16
-
-// AndNot performs a bitwise x &^ y.
-//
-// Asm: VPANDN, CPU Feature: AVX2
-func (x Int8x32) AndNot(y Int8x32) Int8x32
-
-// AndNot performs a bitwise x &^ y.
+// AndNot performs a bitwise x &^ y.
  //
  // Asm: VPANDND, CPU Feature: AVX512
  func (x Int8x64) AndNot(y Int8x64) Int8x64
@@ -1224,92 +685,6 @@ func (x Uint64x4) AndNot(y Uint64x4) Uint64x4
  // Asm: VPANDNQ, CPU Feature: AVX512
  func (x Uint64x8) AndNot(y Uint64x8) Uint64x8
  
-/* AndNotMasked */
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x4) AndNotMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x8) AndNotMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Int32x16) AndNotMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x2) AndNotMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x4) AndNotMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Int64x8) AndNotMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x4) AndNotMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x8) AndNotMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDND, CPU Feature: AVX512
-func (x Uint32x16) AndNotMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x2) AndNotMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x4) AndNotMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// AndNotMasked performs a bitwise x &^ y.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPANDNQ, CPU Feature: AVX512
-func (x Uint64x8) AndNotMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
  /* Average */
  
  // Average computes the rounded average of corresponding elements.
@@ -1342,50 +717,6 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
  // Asm: VPAVGW, CPU Feature: AVX512
  func (x Uint16x32) Average(y Uint16x32) Uint16x32
  
-/* AverageMasked */
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x16) AverageMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x32) AverageMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGB, CPU Feature: AVX512
-func (x Uint8x64) AverageMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x8) AverageMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x16) AverageMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// AverageMasked computes the rounded average of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPAVGW, CPU Feature: AVX512
-func (x Uint16x32) AverageMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
  /* Broadcast128 */
  
  // Broadcast128 copies element zero of its (128-bit) input to all elements of
@@ -1448,88 +779,6 @@ func (x Uint32x4) Broadcast128() Uint32x4
  // Asm: VPBROADCASTQ, CPU Feature: AVX2
  func (x Uint64x2) Broadcast128() Uint64x2
  
-/* Broadcast128Masked */
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast128Masked(mask Mask32x4) Float32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Float64x2) Broadcast128Masked(mask Mask64x2) Float64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast128Masked(mask Mask8x16) Int8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast128Masked(mask Mask16x8) Int16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast128Masked(mask Mask32x4) Int32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast128Masked(mask Mask64x2) Int64x2
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast128Masked(mask Mask8x16) Uint8x16
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast128Masked(mask Mask16x8) Uint16x8
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast128Masked(mask Mask32x4) Uint32x4
-
-// Broadcast128Masked copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast128Masked(mask Mask64x2) Uint64x2
-
  /* Broadcast256 */
  
  // Broadcast256 copies element zero of its (128-bit) input to all elements of
@@ -1592,128 +841,46 @@ func (x Uint32x4) Broadcast256() Uint32x8
  // Asm: VPBROADCASTQ, CPU Feature: AVX2
  func (x Uint64x2) Broadcast256() Uint64x4
  
-/* Broadcast256Masked */
+/* Broadcast512 */
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast256Masked(mask Mask32x4) Float32x8
+func (x Float32x4) Broadcast512() Float32x16
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast256Masked(mask Mask64x2) Float64x4
+func (x Float64x2) Broadcast512() Float64x8
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast256Masked(mask Mask8x16) Int8x32
+func (x Int8x16) Broadcast512() Int8x64
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast256Masked(mask Mask16x8) Int16x16
+func (x Int16x8) Broadcast512() Int16x32
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast256Masked(mask Mask32x4) Int32x8
+func (x Int32x4) Broadcast512() Int32x16
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast256Masked(mask Mask64x2) Int64x4
+func (x Int64x2) Broadcast512() Int64x8
  
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast256Masked(mask Mask8x16) Uint8x32
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast256Masked(mask Mask16x8) Uint16x16
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast256Masked(mask Mask32x4) Uint32x8
-
-// Broadcast256Masked copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast256Masked(mask Mask64x2) Uint64x4
-
-/* Broadcast512 */
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512() Float32x16
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512() Float64x8
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512() Int8x64
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512() Int16x32
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512() Int32x16
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512() Int64x8
-
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast512 copies element zero of its (128-bit) input to all elements of
+// the 512-bit output vector.
  //
  // Asm: VPBROADCASTB, CPU Feature: AVX512
  func (x Uint8x16) Broadcast512() Uint8x64
@@ -1736,88 +903,6 @@ func (x Uint32x4) Broadcast512() Uint32x16
  // Asm: VPBROADCASTQ, CPU Feature: AVX512
  func (x Uint64x2) Broadcast512() Uint64x8
  
-/* Broadcast512Masked */
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512Masked(mask Mask32x4) Float32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512Masked(mask Mask64x2) Float64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512Masked(mask Mask8x16) Int8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512Masked(mask Mask16x8) Int16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512Masked(mask Mask32x4) Int32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512Masked(mask Mask64x2) Int64x8
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512Masked(mask Mask8x16) Uint8x64
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512Masked(mask Mask16x8) Uint16x32
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512Masked(mask Mask32x4) Uint32x16
-
-// Broadcast512Masked copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512Masked(mask Mask64x2) Uint64x8
-
  /* Ceil */
  
  // Ceil rounds elements up to the nearest integer.
@@ -1884,62 +969,6 @@ func (x Float64x4) CeilScaled(prec uint8) Float64x4
  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  func (x Float64x8) CeilScaled(prec uint8) Float64x8
  
-/* CeilScaledMasked */
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledMasked rounds elements up with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* CeilScaledResidue */
  
  // CeilScaledResidue computes the difference after ceiling with specified precision.
@@ -1984,62 +1013,6 @@ func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4
  // Asm: VREDUCEPD, CPU Feature: AVX512
  func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8
  
-/* CeilScaledResidueMasked */
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) CeilScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) CeilScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) CeilScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) CeilScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) CeilScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// CeilScaledResidueMasked computes the difference after ceiling with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) CeilScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* Compress */
  
  // Compress performs a compression on vector x using mask by
@@ -2239,29 +1212,6 @@ func (x Float32x8) ConvertToInt32() Int32x8
  // Asm: VCVTTPS2DQ, CPU Feature: AVX512
  func (x Float32x16) ConvertToInt32() Int32x16
  
-/* ConvertToInt32Masked */
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToInt32Masked(mask Mask32x4) Int32x4
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToInt32Masked(mask Mask32x8) Int32x8
-
-// ConvertToInt32 converts element values to int32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTTPS2DQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToInt32Masked(mask Mask32x16) Int32x16
-
  /* ConvertToUint32 */
  
  // ConvertToUint32Masked converts element values to uint32.
@@ -2279,29 +1229,6 @@ func (x Float32x8) ConvertToUint32() Uint32x8
  // Asm: VCVTPS2UDQ, CPU Feature: AVX512
  func (x Float32x16) ConvertToUint32() Uint32x16
  
-/* ConvertToUint32Masked */
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x4) ConvertToUint32Masked(mask Mask32x4) Uint32x4
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x8) ConvertToUint32Masked(mask Mask32x8) Uint32x8
-
-// ConvertToUint32Masked converts element values to uint32.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCVTPS2UDQ, CPU Feature: AVX512
-func (x Float32x16) ConvertToUint32Masked(mask Mask32x16) Uint32x16
-
  /* CopySign */
  
  // CopySign returns the product of the first operand with -1, 0, or 1,
@@ -2372,57 +1299,13 @@ func (x Float64x4) Div(y Float64x4) Float64x4
  // Asm: VDIVPD, CPU Feature: AVX512
  func (x Float64x8) Div(y Float64x8) Float64x8
  
-/* DivMasked */
+/* DotProdPairs */
  
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// DotProdPairs multiplies the elements and add the pairs together,
+// yielding a vector of half as many elements with twice the input element size.
  //
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x4) DivMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x8) DivMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPS, CPU Feature: AVX512
-func (x Float32x16) DivMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x2) DivMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x4) DivMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// DivMasked divides elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VDIVPD, CPU Feature: AVX512
-func (x Float64x8) DivMasked(y Float64x8, mask Mask64x8) Float64x8
-
-/* DotProdPairs */
-
-// DotProdPairs multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
+// Asm: VPMADDWD, CPU Feature: AVX
+func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
  
  // DotProdPairs multiplies the elements and add the pairs together,
  // yielding a vector of half as many elements with twice the input element size.
@@ -2436,32 +1319,6 @@ func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
  // Asm: VPMADDWD, CPU Feature: AVX512
  func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
  
-/* DotProdPairsMasked */
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x8) DotProdPairsMasked(y Int16x8, mask Mask16x8) Int32x4
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x16) DotProdPairsMasked(y Int16x16, mask Mask16x16) Int32x8
-
-// DotProdPairsMasked multiplies the elements and add the pairs together,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x32) DotProdPairsMasked(y Int16x32, mask Mask16x32) Int32x16
-
  /* DotProdPairsSaturated */
  
  // DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
@@ -2482,32 +1339,6 @@ func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
  // Asm: VPMADDUBSW, CPU Feature: AVX512
  func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
  
-/* DotProdPairsSaturatedMasked */
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x16) DotProdPairsSaturatedMasked(y Int8x16, mask Mask16x8) Int16x8
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x32) DotProdPairsSaturatedMasked(y Int8x32, mask Mask16x16) Int16x16
-
-// DotProdPairsSaturatedMasked multiplies the elements and add the pairs together with saturation,
-// yielding a vector of half as many elements with twice the input element size.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x64) DotProdPairsSaturatedMasked(y Int8x64, mask Mask16x32) Int16x32
-
  /* Equal */
  
  // Equal compares for equality.
@@ -2660,218 +1491,6 @@ func (x Float64x4) Equal(y Float64x4) Mask64x4
  // Asm: VCMPPD, CPU Feature: AVX512
  func (x Float64x8) Equal(y Float64x8) Mask64x8
  
-/* EqualMasked */
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) EqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) EqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) EqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) EqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) EqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) EqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) EqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) EqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) EqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) EqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) EqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) EqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) EqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) EqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) EqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) EqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) EqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) EqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) EqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) EqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) EqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) EqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) EqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) EqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) EqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) EqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) EqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) EqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) EqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// EqualMasked compares for equality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) EqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
  /* Expand */
  
  // Expand performs an expansion on a vector x whose elements are packed to lower parts.
@@ -3120,162 +1739,50 @@ func (x Float64x4) FloorScaled(prec uint8) Float64x4
  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  func (x Float64x8) FloorScaled(prec uint8) Float64x8
  
-/* FloorScaledMasked */
+/* FloorScaledResidue */
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledMasked(prec uint8, mask Mask32x4) Float32x4
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledMasked(prec uint8, mask Mask32x8) Float32x8
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledMasked(prec uint8, mask Mask32x16) Float32x16
+// Asm: VREDUCEPS, CPU Feature: AVX512
+func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledMasked(prec uint8, mask Mask64x2) Float64x2
+// Asm: VREDUCEPD, CPU Feature: AVX512
+func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledMasked(prec uint8, mask Mask64x4) Float64x4
+// Asm: VREDUCEPD, CPU Feature: AVX512
+func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
  
-// FloorScaledMasked rounds elements down with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
-/* FloorScaledResidue */
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4
-
-// FloorScaledResidue computes the difference after flooring with specified precision.
+// FloorScaledResidue computes the difference after flooring with specified precision.
  //
  // prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
  // Asm: VREDUCEPD, CPU Feature: AVX512
  func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8
  
-/* FloorScaledResidueMasked */
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) FloorScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) FloorScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) FloorScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) FloorScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) FloorScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// FloorScaledResidueMasked computes the difference after flooring with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) FloorScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* GaloisFieldAffineTransform */
  
  // GaloisFieldAffineTransform computes an affine transformation in GF(2^8):
@@ -3343,85 +1850,6 @@ func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x3
  // Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
  func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64
  
-/* GaloisFieldAffineTransformInverseMasked */
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformInverseMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformInverseMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformInverseMasked computes an affine transformation in GF(2^8),
-// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1:
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformInverseMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
-/* GaloisFieldAffineTransformMasked */
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldAffineTransformMasked(y Uint64x2, b uint8, mask Mask8x16) Uint8x16
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldAffineTransformMasked(y Uint64x4, b uint8, mask Mask8x32) Uint8x32
-
-// GaloisFieldAffineTransformMasked computes an affine transformation in GF(2^8):
-// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes;
-// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y
-// corresponding to a group of 8 elements in x.
-//
-// This operation is applied selectively under a write mask.
-//
-// b results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldAffineTransformMasked(y Uint64x8, b uint8, mask Mask8x64) Uint8x64
-
  /* GaloisFieldMul */
  
  // GaloisFieldMul computes element-wise GF(2^8) multiplication with
@@ -3442,32 +1870,6 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
  // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
  func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
  
-/* GaloisFieldMulMasked */
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x16) GaloisFieldMulMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// GaloisFieldMulMasked computes element-wise GF(2^8) multiplication with
-// reduction polynomial x^8 + x^4 + x^3 + x + 1.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
-func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
  /* GetElem */
  
  // GetElem retrieves a single constant-indexed element's value.
@@ -3928,4139 +2330,1489 @@ func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
  // Asm: VPCMPUQ, CPU Feature: AVX512
  func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
  
-/* GreaterEqualMasked */
+/* IsNan */
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) IsNan(y Float32x4) Mask32x4
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) IsNan(y Float32x8) Mask32x8
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
+func (x Float32x16) IsNan(y Float32x16) Mask32x16
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) IsNan(y Float64x2) Mask64x2
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) IsNan(y Float64x4) Mask64x4
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// IsNan checks if elements are NaN. Use as x.IsNan(x).
  //
  // Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
+func (x Float64x8) IsNan(y Float64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
+/* Less */
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) Less(y Float32x4) Mask32x4
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) Less(y Float32x8) Mask32x8
  
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) Less(y Float32x16) Mask32x16
+
+// Less compares for less than.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) Less(y Float64x2) Mask64x2
  
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) Less(y Float64x4) Mask64x4
+
+// Less compares for less than.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) Less(y Float64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) Less(y Int8x64) Mask8x64
+
+// Less compares for less than.
  //
  // Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+func (x Int16x32) Less(y Int16x32) Mask16x32
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
  // Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+func (x Int32x16) Less(y Int32x16) Mask32x16
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPQ, CPU Feature: AVX512
+func (x Int64x8) Less(y Int64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
+// Less compares for less than.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) Less(y Uint8x64) Mask8x64
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) Less(y Uint16x32) Mask16x32
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) Less(y Uint32x16) Mask32x16
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Less compares for less than.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPCMPUQ, CPU Feature: AVX512
+func (x Uint64x8) Less(y Uint64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+/* LessEqual */
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) LessEqual(y Float32x4) Mask32x4
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) LessEqual(y Float32x8) Mask32x8
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) LessEqual(y Float32x16) Mask32x16
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) LessEqual(y Float64x2) Mask64x2
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) LessEqual(y Float64x4) Mask64x4
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) LessEqual(y Float64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) LessEqual(y Int8x64) Mask8x64
  
-// GreaterEqualMasked compares for greater than or equal.
-//
-// This operation is applied selectively under a write mask.
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPCMPW, CPU Feature: AVX512
+func (x Int16x32) LessEqual(y Int16x32) Mask16x32
  
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPD, CPU Feature: AVX512
+func (x Int32x16) LessEqual(y Int32x16) Mask32x16
+
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPCMPQ, CPU Feature: AVX512
+func (x Int64x8) LessEqual(y Int64x8) Mask64x8
  
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
+
+// LessEqual compares for less than or equal.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
  
-// GreaterEqualMasked compares for greater than or equal.
+// LessEqual compares for less than or equal.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
+
+// LessEqual compares for less than or equal.
  //
  // Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
  
-/* GreaterMasked */
+/* Max */
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) GreaterMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x4) Max(y Float32x4) Float32x4
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) GreaterMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VMAXPS, CPU Feature: AVX
+func (x Float32x8) Max(y Float32x8) Float32x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) GreaterMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VMAXPS, CPU Feature: AVX512
+func (x Float32x16) Max(y Float32x16) Float32x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) GreaterMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x2) Max(y Float64x2) Float64x2
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) GreaterMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VMAXPD, CPU Feature: AVX
+func (x Float64x4) Max(y Float64x4) Float64x4
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) GreaterMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VMAXPD, CPU Feature: AVX512
+func (x Float64x8) Max(y Float64x8) Float64x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterMasked(y Int8x16, mask Mask8x16) Mask8x16
+// Asm: VPMAXSB, CPU Feature: AVX
+func (x Int8x16) Max(y Int8x16) Int8x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VPMAXSB, CPU Feature: AVX2
+func (x Int8x32) Max(y Int8x32) Int8x32
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) GreaterMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VPMAXSB, CPU Feature: AVX512
+func (x Int8x64) Max(y Int8x64) Int8x64
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VPMAXSW, CPU Feature: AVX
+func (x Int16x8) Max(y Int16x8) Int16x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VPMAXSW, CPU Feature: AVX2
+func (x Int16x16) Max(y Int16x16) Int16x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) GreaterMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VPMAXSW, CPU Feature: AVX512
+func (x Int16x32) Max(y Int16x32) Int16x32
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VPMAXSD, CPU Feature: AVX
+func (x Int32x4) Max(y Int32x4) Int32x4
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPMAXSD, CPU Feature: AVX2
+func (x Int32x8) Max(y Int32x8) Int32x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) GreaterMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VPMAXSD, CPU Feature: AVX512
+func (x Int32x16) Max(y Int32x16) Int32x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x2) Max(y Int64x2) Int64x2
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x4) Max(y Int64x4) Int64x4
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) GreaterMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPMAXSQ, CPU Feature: AVX512
+func (x Int64x8) Max(y Int64x8) Int64x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VPMAXUB, CPU Feature: AVX
+func (x Uint8x16) Max(y Uint8x16) Uint8x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VPMAXUB, CPU Feature: AVX2
+func (x Uint8x32) Max(y Uint8x32) Uint8x32
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) GreaterMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VPMAXUB, CPU Feature: AVX512
+func (x Uint8x64) Max(y Uint8x64) Uint8x64
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VPMAXUW, CPU Feature: AVX
+func (x Uint16x8) Max(y Uint16x8) Uint16x8
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VPMAXUW, CPU Feature: AVX2
+func (x Uint16x16) Max(y Uint16x16) Uint16x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) GreaterMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VPMAXUW, CPU Feature: AVX512
+func (x Uint16x32) Max(y Uint16x32) Uint16x32
  
-// GreaterMasked compares for greater than.
+// Max computes the maximum of corresponding elements.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMAXUD, CPU Feature: AVX
+func (x Uint32x4) Max(y Uint32x4) Uint32x4
+
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VPMAXUD, CPU Feature: AVX2
+func (x Uint32x8) Max(y Uint32x8) Uint32x8
  
-// GreaterMasked compares for greater than.
+// Max computes the maximum of corresponding elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) GreaterMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPMAXUD, CPU Feature: AVX512
+func (x Uint32x16) Max(y Uint32x16) Uint32x16
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x2) Max(y Uint64x2) Uint64x2
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x4) Max(y Uint64x4) Uint64x4
  
-// GreaterMasked compares for greater than.
-//
-// This operation is applied selectively under a write mask.
+// Max computes the maximum of corresponding elements.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) GreaterMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPMAXUQ, CPU Feature: AVX512
+func (x Uint64x8) Max(y Uint64x8) Uint64x8
  
-/* IsNan */
+/* Min */
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) IsNan(y Float32x4) Mask32x4
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x4) Min(y Float32x4) Float32x4
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) IsNan(y Float32x8) Mask32x8
+// Asm: VMINPS, CPU Feature: AVX
+func (x Float32x8) Min(y Float32x8) Float32x8
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNan(y Float32x16) Mask32x16
+// Asm: VMINPS, CPU Feature: AVX512
+func (x Float32x16) Min(y Float32x16) Float32x16
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) IsNan(y Float64x2) Mask64x2
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x2) Min(y Float64x2) Float64x2
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) IsNan(y Float64x4) Mask64x4
+// Asm: VMINPD, CPU Feature: AVX
+func (x Float64x4) Min(y Float64x4) Float64x4
  
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNan(y Float64x8) Mask64x8
-
-/* IsNanMasked */
+// Asm: VMINPD, CPU Feature: AVX512
+func (x Float64x8) Min(y Float64x8) Float64x8
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) IsNanMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VPMINSB, CPU Feature: AVX
+func (x Int8x16) Min(y Int8x16) Int8x16
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) IsNanMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VPMINSB, CPU Feature: AVX2
+func (x Int8x32) Min(y Int8x32) Int8x32
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
-//
-// This operation is applied selectively under a write mask.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNanMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VPMINSB, CPU Feature: AVX512
+func (x Int8x64) Min(y Int8x64) Int8x64
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSW, CPU Feature: AVX
+func (x Int16x8) Min(y Int16x8) Int16x8
+
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) IsNanMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VPMINSW, CPU Feature: AVX2
+func (x Int16x16) Min(y Int16x16) Int16x16
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSW, CPU Feature: AVX512
+func (x Int16x32) Min(y Int16x32) Int16x32
+
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) IsNanMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VPMINSD, CPU Feature: AVX
+func (x Int32x4) Min(y Int32x4) Int32x4
  
-// IsNanMasked checks if elements are NaN. Use as x.IsNan(x).
+// Min computes the minimum of corresponding elements.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMINSD, CPU Feature: AVX2
+func (x Int32x8) Min(y Int32x8) Int32x8
+
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNanMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VPMINSD, CPU Feature: AVX512
+func (x Int32x16) Min(y Int32x16) Int32x16
  
-/* Less */
+// Min computes the minimum of corresponding elements.
+//
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x2) Min(y Int64x2) Int64x2
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) Less(y Float32x4) Mask32x4
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x4) Min(y Int64x4) Int64x4
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) Less(y Float32x8) Mask32x8
+// Asm: VPMINSQ, CPU Feature: AVX512
+func (x Int64x8) Min(y Int64x8) Int64x8
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) Less(y Float32x16) Mask32x16
+// Asm: VPMINUB, CPU Feature: AVX
+func (x Uint8x16) Min(y Uint8x16) Uint8x16
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) Less(y Float64x2) Mask64x2
+// Asm: VPMINUB, CPU Feature: AVX2
+func (x Uint8x32) Min(y Uint8x32) Uint8x32
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) Less(y Float64x4) Mask64x4
+// Asm: VPMINUB, CPU Feature: AVX512
+func (x Uint8x64) Min(y Uint8x64) Uint8x64
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) Less(y Float64x8) Mask64x8
+// Asm: VPMINUW, CPU Feature: AVX
+func (x Uint16x8) Min(y Uint16x8) Uint16x8
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) Less(y Int8x64) Mask8x64
+// Asm: VPMINUW, CPU Feature: AVX2
+func (x Uint16x16) Min(y Uint16x16) Uint16x16
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) Less(y Int16x32) Mask16x32
+// Asm: VPMINUW, CPU Feature: AVX512
+func (x Uint16x32) Min(y Uint16x32) Uint16x32
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) Less(y Int32x16) Mask32x16
+// Asm: VPMINUD, CPU Feature: AVX
+func (x Uint32x4) Min(y Uint32x4) Uint32x4
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) Less(y Int64x8) Mask64x8
+// Asm: VPMINUD, CPU Feature: AVX2
+func (x Uint32x8) Min(y Uint32x8) Uint32x8
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) Less(y Uint8x64) Mask8x64
+// Asm: VPMINUD, CPU Feature: AVX512
+func (x Uint32x16) Min(y Uint32x16) Uint32x16
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) Less(y Uint16x32) Mask16x32
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x2) Min(y Uint64x2) Uint64x2
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) Less(y Uint32x16) Mask32x16
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x4) Min(y Uint64x4) Uint64x4
  
-// Less compares for less than.
+// Min computes the minimum of corresponding elements.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) Less(y Uint64x8) Mask64x8
+// Asm: VPMINUQ, CPU Feature: AVX512
+func (x Uint64x8) Min(y Uint64x8) Uint64x8
  
-/* LessEqual */
+/* Mul */
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) LessEqual(y Float32x4) Mask32x4
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x4) Mul(y Float32x4) Float32x4
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) LessEqual(y Float32x8) Mask32x8
+// Asm: VMULPS, CPU Feature: AVX
+func (x Float32x8) Mul(y Float32x8) Float32x8
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqual(y Float32x16) Mask32x16
+// Asm: VMULPS, CPU Feature: AVX512
+func (x Float32x16) Mul(y Float32x16) Float32x16
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) LessEqual(y Float64x2) Mask64x2
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x2) Mul(y Float64x2) Float64x2
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) LessEqual(y Float64x4) Mask64x4
+// Asm: VMULPD, CPU Feature: AVX
+func (x Float64x4) Mul(y Float64x4) Float64x4
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqual(y Float64x8) Mask64x8
+// Asm: VMULPD, CPU Feature: AVX512
+func (x Float64x8) Mul(y Float64x8) Float64x8
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqual(y Int8x64) Mask8x64
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Int16x8) Mul(y Int16x8) Int16x8
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqual(y Int16x32) Mask16x32
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Int16x16) Mul(y Int16x16) Int16x16
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqual(y Int32x16) Mask32x16
+// Asm: VPMULLW, CPU Feature: AVX512
+func (x Int16x32) Mul(y Int16x32) Int16x32
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqual(y Int64x8) Mask64x8
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Int32x4) Mul(y Int32x4) Int32x4
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Int32x8) Mul(y Int32x8) Int32x8
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
+// Asm: VPMULLD, CPU Feature: AVX512
+func (x Int32x16) Mul(y Int32x16) Int32x16
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x2) Mul(y Int64x2) Int64x2
  
-// LessEqual compares for less than or equal.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
-
-/* LessEqualMasked */
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x4) Mul(y Int64x4) Int64x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Int64x8) Mul(y Int64x8) Int64x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VPMULLW, CPU Feature: AVX
+func (x Uint16x8) Mul(y Uint16x8) Uint16x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VPMULLW, CPU Feature: AVX2
+func (x Uint16x16) Mul(y Uint16x16) Uint16x16
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VPMULLW, CPU Feature: AVX512
+func (x Uint16x32) Mul(y Uint16x32) Uint16x32
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VPMULLD, CPU Feature: AVX
+func (x Uint32x4) Mul(y Uint32x4) Uint32x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VPMULLD, CPU Feature: AVX2
+func (x Uint32x8) Mul(y Uint32x8) Uint32x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
+// Asm: VPMULLD, CPU Feature: AVX512
+func (x Uint32x16) Mul(y Uint32x16) Uint32x16
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x2) Mul(y Uint64x2) Uint64x2
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x4) Mul(y Uint64x4) Uint64x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// Mul multiplies corresponding elements of two vectors.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VPMULLQ, CPU Feature: AVX512
+func (x Uint64x8) Mul(y Uint64x8) Uint64x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
+/* MulAdd */
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VFMADD213PS, CPU Feature: AVX512
+func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAdd performs a fused (x * y) + z.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VFMADD213PD, CPU Feature: AVX512
+func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
+/* MulAddSub */
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VFMADDSUB213PS, CPU Feature: AVX512
+func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
+// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VFMADDSUB213PD, CPU Feature: AVX512
+func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
  
-// LessEqualMasked compares for less than or equal.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
+/* MulEvenWiden */
  
-// LessEqualMasked compares for less than or equal.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULDQ, CPU Feature: AVX
+func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPMULDQ, CPU Feature: AVX2
+func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
  
-// LessEqualMasked compares for less than or equal.
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULUDQ, CPU Feature: AVX
+func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
+
+// MulEvenWiden multiplies even-indexed elements, widening the result.
+// Result[i] = v1.Even[i] * v2.Even[i].
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPMULUDQ, CPU Feature: AVX2
+func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
  
-// LessEqualMasked compares for less than or equal.
+/* MulHigh */
+
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHW, CPU Feature: AVX
+func (x Int16x8) MulHigh(y Int16x8) Int16x8
+
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPMULHW, CPU Feature: AVX2
+func (x Int16x16) MulHigh(y Int16x16) Int16x16
  
-// LessEqualMasked compares for less than or equal.
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHW, CPU Feature: AVX512
+func (x Int16x32) MulHigh(y Int16x32) Int16x32
+
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPMULHUW, CPU Feature: AVX
+func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
  
-// LessEqualMasked compares for less than or equal.
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPMULHUW, CPU Feature: AVX2
+func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
+
+// MulHigh multiplies elements and stores the high part of the result.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPMULHUW, CPU Feature: AVX512
+func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
  
-/* LessMasked */
+/* MulSubAdd */
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) LessMasked(y Float32x4, mask Mask32x4) Mask32x4
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) LessMasked(y Float32x8, mask Mask32x8) Mask32x8
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) LessMasked(y Float32x16, mask Mask32x16) Mask32x16
+// Asm: VFMSUBADD213PS, CPU Feature: AVX512
+func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) LessMasked(y Float64x2, mask Mask64x2) Mask64x2
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
  
-// LessMasked compares for less than.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) LessMasked(y Float64x4, mask Mask64x4) Mask64x4
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
  //
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) LessMasked(y Float64x8, mask Mask64x8) Mask64x8
+// Asm: VFMSUBADD213PD, CPU Feature: AVX512
+func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessMasked(y Int8x16, mask Mask8x16) Mask8x16
+/* NotEqual */
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessMasked(y Int8x32, mask Mask8x32) Mask8x32
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) NotEqual(y Float32x4) Mask32x4
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) LessMasked(y Int8x64, mask Mask8x64) Mask8x64
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) NotEqual(y Float32x8) Mask32x8
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessMasked(y Int16x8, mask Mask16x8) Mask16x8
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) NotEqual(y Float32x16) Mask32x16
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessMasked(y Int16x16, mask Mask16x16) Mask16x16
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) NotEqual(y Float64x2) Mask64x2
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) LessMasked(y Int16x32, mask Mask16x32) Mask16x32
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) NotEqual(y Float64x4) Mask64x4
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessMasked(y Int32x4, mask Mask32x4) Mask32x4
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) NotEqual(y Float64x8) Mask64x8
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessMasked(y Int32x8, mask Mask32x8) Mask32x8
+// Asm: VPCMPB, CPU Feature: AVX512
+func (x Int8x64) NotEqual(y Int8x64) Mask8x64
  
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPW, CPU Feature: AVX512
+func (x Int16x32) NotEqual(y Int16x32) Mask16x32
+
+// NotEqual compares for inequality.
  //
  // Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) LessMasked(y Int32x16, mask Mask32x16) Mask32x16
+func (x Int32x16) NotEqual(y Int32x16) Mask32x16
  
-// LessMasked compares for less than.
-//
-// This operation is applied selectively under a write mask.
+// NotEqual compares for inequality.
  //
  // Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessMasked(y Int64x2, mask Mask64x2) Mask64x2
+func (x Int64x8) NotEqual(y Int64x8) Mask64x8
  
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUB, CPU Feature: AVX512
+func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
+
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessMasked(y Int64x4, mask Mask64x4) Mask64x4
+// Asm: VPCMPUW, CPU Feature: AVX512
+func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
  
-// LessMasked compares for less than.
+// NotEqual compares for inequality.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPCMPUD, CPU Feature: AVX512
+func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
+
+// NotEqual compares for inequality.
  //
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) LessMasked(y Int64x8, mask Mask64x8) Mask64x8
+// Asm: VPCMPUQ, CPU Feature: AVX512
+func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
+
+/* OnesCount */
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x16) OnesCount() Int8x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessMasked(y Uint8x16, mask Mask8x16) Mask8x16
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x32) OnesCount() Int8x32
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Int8x64) OnesCount() Int8x64
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessMasked(y Uint8x32, mask Mask8x32) Mask8x32
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x8) OnesCount() Int16x8
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x16) OnesCount() Int16x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) LessMasked(y Uint8x64, mask Mask8x64) Mask8x64
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Int16x32) OnesCount() Int16x32
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x4) OnesCount() Int32x4
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessMasked(y Uint16x8, mask Mask16x8) Mask16x8
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x8) OnesCount() Int32x8
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Int32x16) OnesCount() Int32x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessMasked(y Uint16x16, mask Mask16x16) Mask16x16
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x2) OnesCount() Int64x2
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x4) OnesCount() Int64x4
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) LessMasked(y Uint16x32, mask Mask16x32) Mask16x32
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Int64x8) OnesCount() Int64x8
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x16) OnesCount() Uint8x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessMasked(y Uint32x4, mask Mask32x4) Mask32x4
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x32) OnesCount() Uint8x32
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
+func (x Uint8x64) OnesCount() Uint8x64
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessMasked(y Uint32x8, mask Mask32x8) Mask32x8
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x8) OnesCount() Uint16x8
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x16) OnesCount() Uint16x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) LessMasked(y Uint32x16, mask Mask32x16) Mask32x16
+// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
+func (x Uint16x32) OnesCount() Uint16x32
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x4) OnesCount() Uint32x4
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessMasked(y Uint64x2, mask Mask64x2) Mask64x2
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x8) OnesCount() Uint32x8
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint32x16) OnesCount() Uint32x16
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessMasked(y Uint64x4, mask Mask64x4) Mask64x4
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x2) OnesCount() Uint64x2
  
-// LessMasked compares for less than.
+// OnesCount counts the number of set bits in each element.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x4) OnesCount() Uint64x4
+
+// OnesCount counts the number of set bits in each element.
  //
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) LessMasked(y Uint64x8, mask Mask64x8) Mask64x8
+// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
+func (x Uint64x8) OnesCount() Uint64x8
  
-/* Max */
+/* Or */
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x4) Max(y Float32x4) Float32x4
+// Asm: VPOR, CPU Feature: AVX
+func (x Int8x16) Or(y Int8x16) Int8x16
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPS, CPU Feature: AVX
-func (x Float32x8) Max(y Float32x8) Float32x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int8x32) Or(y Int8x32) Int8x32
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) Max(y Float32x16) Float32x16
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int8x64) Or(y Int8x64) Int8x64
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x2) Max(y Float64x2) Float64x2
+// Asm: VPOR, CPU Feature: AVX
+func (x Int16x8) Or(y Int16x8) Int16x8
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPD, CPU Feature: AVX
-func (x Float64x4) Max(y Float64x4) Float64x4
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int16x16) Or(y Int16x16) Int16x16
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) Max(y Float64x8) Float64x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int16x32) Or(y Int16x32) Int16x32
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPMAXSB, CPU Feature: AVX
-func (x Int8x16) Max(y Int8x16) Int8x16
+// Asm: VPOR, CPU Feature: AVX
+func (x Int32x4) Or(y Int32x4) Int32x4
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPMAXSB, CPU Feature: AVX2
-func (x Int8x32) Max(y Int8x32) Int8x32
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int32x8) Or(y Int32x8) Int32x8
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) Max(y Int8x64) Int8x64
+// Asm: VPORD, CPU Feature: AVX512
+func (x Int32x16) Or(y Int32x16) Int32x16
  
-// Max computes the maximum of corresponding elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPMAXSW, CPU Feature: AVX
-func (x Int16x8) Max(y Int16x8) Int16x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSW, CPU Feature: AVX2
-func (x Int16x16) Max(y Int16x16) Int16x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) Max(y Int16x32) Int16x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX
-func (x Int32x4) Max(y Int32x4) Int32x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX2
-func (x Int32x8) Max(y Int32x8) Int32x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) Max(y Int32x16) Int32x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) Max(y Int64x2) Int64x2
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) Max(y Int64x4) Int64x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) Max(y Int64x8) Int64x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX
-func (x Uint8x16) Max(y Uint8x16) Uint8x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX2
-func (x Uint8x32) Max(y Uint8x32) Uint8x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) Max(y Uint8x64) Uint8x64
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX
-func (x Uint16x8) Max(y Uint16x8) Uint16x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX2
-func (x Uint16x16) Max(y Uint16x16) Uint16x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) Max(y Uint16x32) Uint16x32
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX
-func (x Uint32x4) Max(y Uint32x4) Uint32x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX2
-func (x Uint32x8) Max(y Uint32x8) Uint32x8
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) Max(y Uint32x16) Uint32x16
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) Max(y Uint64x2) Uint64x2
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) Max(y Uint64x4) Uint64x4
-
-// Max computes the maximum of corresponding elements.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) Max(y Uint64x8) Uint64x8
-
-/* MaxMasked */
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x4) MaxMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x8) MaxMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPS, CPU Feature: AVX512
-func (x Float32x16) MaxMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x2) MaxMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x4) MaxMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMAXPD, CPU Feature: AVX512
-func (x Float64x8) MaxMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x16) MaxMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x32) MaxMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSB, CPU Feature: AVX512
-func (x Int8x64) MaxMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x8) MaxMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x16) MaxMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSW, CPU Feature: AVX512
-func (x Int16x32) MaxMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x4) MaxMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x8) MaxMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSD, CPU Feature: AVX512
-func (x Int32x16) MaxMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x2) MaxMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x4) MaxMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXSQ, CPU Feature: AVX512
-func (x Int64x8) MaxMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x16) MaxMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x32) MaxMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUB, CPU Feature: AVX512
-func (x Uint8x64) MaxMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x8) MaxMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x16) MaxMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUW, CPU Feature: AVX512
-func (x Uint16x32) MaxMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x4) MaxMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x8) MaxMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUD, CPU Feature: AVX512
-func (x Uint32x16) MaxMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x2) MaxMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x4) MaxMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MaxMasked computes the maximum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMAXUQ, CPU Feature: AVX512
-func (x Uint64x8) MaxMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Min */
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x4) Min(y Float32x4) Float32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX
-func (x Float32x8) Min(y Float32x8) Float32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) Min(y Float32x16) Float32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x2) Min(y Float64x2) Float64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX
-func (x Float64x4) Min(y Float64x4) Float64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) Min(y Float64x8) Float64x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX
-func (x Int8x16) Min(y Int8x16) Int8x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX2
-func (x Int8x32) Min(y Int8x32) Int8x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) Min(y Int8x64) Int8x64
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX
-func (x Int16x8) Min(y Int16x8) Int16x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX2
-func (x Int16x16) Min(y Int16x16) Int16x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) Min(y Int16x32) Int16x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX
-func (x Int32x4) Min(y Int32x4) Int32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX2
-func (x Int32x8) Min(y Int32x8) Int32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) Min(y Int32x16) Int32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) Min(y Int64x2) Int64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) Min(y Int64x4) Int64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) Min(y Int64x8) Int64x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX
-func (x Uint8x16) Min(y Uint8x16) Uint8x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX2
-func (x Uint8x32) Min(y Uint8x32) Uint8x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) Min(y Uint8x64) Uint8x64
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX
-func (x Uint16x8) Min(y Uint16x8) Uint16x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX2
-func (x Uint16x16) Min(y Uint16x16) Uint16x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) Min(y Uint16x32) Uint16x32
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX
-func (x Uint32x4) Min(y Uint32x4) Uint32x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX2
-func (x Uint32x8) Min(y Uint32x8) Uint32x8
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) Min(y Uint32x16) Uint32x16
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) Min(y Uint64x2) Uint64x2
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) Min(y Uint64x4) Uint64x4
-
-// Min computes the minimum of corresponding elements.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) Min(y Uint64x8) Uint64x8
-
-/* MinMasked */
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x4) MinMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x8) MinMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPS, CPU Feature: AVX512
-func (x Float32x16) MinMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x2) MinMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x4) MinMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMINPD, CPU Feature: AVX512
-func (x Float64x8) MinMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x16) MinMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x32) MinMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSB, CPU Feature: AVX512
-func (x Int8x64) MinMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x8) MinMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x16) MinMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSW, CPU Feature: AVX512
-func (x Int16x32) MinMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x4) MinMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x8) MinMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSD, CPU Feature: AVX512
-func (x Int32x16) MinMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x2) MinMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x4) MinMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINSQ, CPU Feature: AVX512
-func (x Int64x8) MinMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x16) MinMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x32) MinMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUB, CPU Feature: AVX512
-func (x Uint8x64) MinMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x8) MinMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x16) MinMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUW, CPU Feature: AVX512
-func (x Uint16x32) MinMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x4) MinMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x8) MinMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUD, CPU Feature: AVX512
-func (x Uint32x16) MinMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x2) MinMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x4) MinMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MinMasked computes the minimum of corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMINUQ, CPU Feature: AVX512
-func (x Uint64x8) MinMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Mul */
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x4) Mul(y Float32x4) Float32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX
-func (x Float32x8) Mul(y Float32x8) Float32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) Mul(y Float32x16) Float32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x2) Mul(y Float64x2) Float64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX
-func (x Float64x4) Mul(y Float64x4) Float64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) Mul(y Float64x8) Float64x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Int16x8) Mul(y Int16x8) Int16x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Int16x16) Mul(y Int16x16) Int16x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) Mul(y Int16x32) Int16x32
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Int32x4) Mul(y Int32x4) Int32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Int32x8) Mul(y Int32x8) Int32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) Mul(y Int32x16) Int32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) Mul(y Int64x2) Int64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) Mul(y Int64x4) Int64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) Mul(y Int64x8) Int64x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX
-func (x Uint16x8) Mul(y Uint16x8) Uint16x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX2
-func (x Uint16x16) Mul(y Uint16x16) Uint16x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) Mul(y Uint16x32) Uint16x32
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX
-func (x Uint32x4) Mul(y Uint32x4) Uint32x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX2
-func (x Uint32x8) Mul(y Uint32x8) Uint32x8
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) Mul(y Uint32x16) Uint32x16
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) Mul(y Uint64x2) Uint64x2
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) Mul(y Uint64x4) Uint64x4
-
-// Mul multiplies corresponding elements of two vectors.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) Mul(y Uint64x8) Uint64x8
-
-/* MulAdd */
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4
-
-// MulAdd performs a fused (x * y) + z.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8
-
-/* MulAddMasked */
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddMasked performs a fused (x * y) + z.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* MulAddSub */
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4
-
-// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8
-
-/* MulAddSubMasked */
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x4) MulAddSubMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x8) MulAddSubMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PS, CPU Feature: AVX512
-func (x Float32x16) MulAddSubMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x2) MulAddSubMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x4) MulAddSubMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulAddSubMasked performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMADDSUB213PD, CPU Feature: AVX512
-func (x Float64x8) MulAddSubMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* MulEvenWiden */
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULDQ, CPU Feature: AVX
-func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULDQ, CPU Feature: AVX2
-func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULUDQ, CPU Feature: AVX
-func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2
-
-// MulEvenWiden multiplies even-indexed elements, widening the result.
-// Result[i] = v1.Even[i] * v2.Even[i].
-//
-// Asm: VPMULUDQ, CPU Feature: AVX2
-func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4
-
-/* MulHigh */
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX
-func (x Int16x8) MulHigh(y Int16x8) Int16x8
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX2
-func (x Int16x16) MulHigh(y Int16x16) Int16x16
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHigh(y Int16x32) Int16x32
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX
-func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX2
-func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16
-
-// MulHigh multiplies elements and stores the high part of the result.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32
-
-/* MulHighMasked */
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x8) MulHighMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x16) MulHighMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHW, CPU Feature: AVX512
-func (x Int16x32) MulHighMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x8) MulHighMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x16) MulHighMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulHighMasked multiplies elements and stores the high part of the result.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULHUW, CPU Feature: AVX512
-func (x Uint16x32) MulHighMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-/* MulMasked */
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x4) MulMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x8) MulMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPS, CPU Feature: AVX512
-func (x Float32x16) MulMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x2) MulMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x4) MulMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VMULPD, CPU Feature: AVX512
-func (x Float64x8) MulMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x8) MulMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x16) MulMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Int16x32) MulMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x4) MulMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x8) MulMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Int32x16) MulMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x2) MulMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x4) MulMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Int64x8) MulMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x8) MulMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x16) MulMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLW, CPU Feature: AVX512
-func (x Uint16x32) MulMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x4) MulMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x8) MulMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLD, CPU Feature: AVX512
-func (x Uint32x16) MulMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x2) MulMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x4) MulMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// MulMasked multiplies corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPMULLQ, CPU Feature: AVX512
-func (x Uint64x8) MulMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* MulSubAdd */
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4
-
-// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
-
-/* MulSubAddMasked */
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x4) MulSubAddMasked(y Float32x4, z Float32x4, mask Mask32x4) Float32x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x8) MulSubAddMasked(y Float32x8, z Float32x8, mask Mask32x8) Float32x8
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PS, CPU Feature: AVX512
-func (x Float32x16) MulSubAddMasked(y Float32x16, z Float32x16, mask Mask32x16) Float32x16
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x2) MulSubAddMasked(y Float64x2, z Float64x2, mask Mask64x2) Float64x2
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x4) MulSubAddMasked(y Float64x4, z Float64x4, mask Mask64x4) Float64x4
-
-// MulSubAddMasked performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VFMSUBADD213PD, CPU Feature: AVX512
-func (x Float64x8) MulSubAddMasked(y Float64x8, z Float64x8, mask Mask64x8) Float64x8
-
-/* NotEqual */
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) NotEqual(y Float32x4) Mask32x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) NotEqual(y Float32x8) Mask32x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqual(y Float32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) NotEqual(y Float64x2) Mask64x2
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) NotEqual(y Float64x4) Mask64x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqual(y Float64x8) Mask64x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqual(y Int8x64) Mask8x64
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqual(y Int16x32) Mask16x32
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqual(y Int32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqual(y Int64x8) Mask64x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8
-
-/* NotEqualMasked */
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x4) NotEqualMasked(y Float32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x8) NotEqualMasked(y Float32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) NotEqualMasked(y Float32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x2) NotEqualMasked(y Float64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x4) NotEqualMasked(y Float64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) NotEqualMasked(y Float64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) NotEqualMasked(y Int8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) NotEqualMasked(y Int8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x64) NotEqualMasked(y Int8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) NotEqualMasked(y Int16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) NotEqualMasked(y Int16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x32) NotEqualMasked(y Int16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) NotEqualMasked(y Int32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) NotEqualMasked(y Int32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x16) NotEqualMasked(y Int32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) NotEqualMasked(y Int64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) NotEqualMasked(y Int64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x8) NotEqualMasked(y Int64x8, mask Mask64x8) Mask64x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) NotEqualMasked(y Uint8x16, mask Mask8x16) Mask8x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) NotEqualMasked(y Uint8x32, mask Mask8x32) Mask8x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x64) NotEqualMasked(y Uint8x64, mask Mask8x64) Mask8x64
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) NotEqualMasked(y Uint16x8, mask Mask16x8) Mask16x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) NotEqualMasked(y Uint16x16, mask Mask16x16) Mask16x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x32) NotEqualMasked(y Uint16x32, mask Mask16x32) Mask16x32
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) NotEqualMasked(y Uint32x4, mask Mask32x4) Mask32x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) NotEqualMasked(y Uint32x8, mask Mask32x8) Mask32x8
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x16) NotEqualMasked(y Uint32x16, mask Mask32x16) Mask32x16
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) NotEqualMasked(y Uint64x2, mask Mask64x2) Mask64x2
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) NotEqualMasked(y Uint64x4, mask Mask64x4) Mask64x4
-
-// NotEqualMasked compares for inequality.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x8) NotEqualMasked(y Uint64x8, mask Mask64x8) Mask64x8
-
-/* OnesCount */
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCount() Int8x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCount() Int8x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCount() Int8x64
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCount() Int16x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCount() Int16x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCount() Int16x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCount() Int32x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCount() Int32x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCount() Int32x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCount() Int64x2
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCount() Int64x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCount() Int64x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCount() Uint8x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCount() Uint8x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCount() Uint8x64
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCount() Uint16x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCount() Uint16x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCount() Uint16x32
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCount() Uint32x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCount() Uint32x8
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCount() Uint32x16
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCount() Uint64x2
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCount() Uint64x4
-
-// OnesCount counts the number of set bits in each element.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCount() Uint64x8
-
-/* OnesCountMasked */
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x16) OnesCountMasked(mask Mask8x16) Int8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x32) OnesCountMasked(mask Mask8x32) Int8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Int8x64) OnesCountMasked(mask Mask8x64) Int8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x8) OnesCountMasked(mask Mask16x8) Int16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x16) OnesCountMasked(mask Mask16x16) Int16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Int16x32) OnesCountMasked(mask Mask16x32) Int16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x4) OnesCountMasked(mask Mask32x4) Int32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x8) OnesCountMasked(mask Mask32x8) Int32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Int32x16) OnesCountMasked(mask Mask32x16) Int32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x2) OnesCountMasked(mask Mask64x2) Int64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x4) OnesCountMasked(mask Mask64x4) Int64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Int64x8) OnesCountMasked(mask Mask64x8) Int64x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x16) OnesCountMasked(mask Mask8x16) Uint8x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x32) OnesCountMasked(mask Mask8x32) Uint8x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTB, CPU Feature: AVX512BITALG
-func (x Uint8x64) OnesCountMasked(mask Mask8x64) Uint8x64
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x8) OnesCountMasked(mask Mask16x8) Uint16x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x16) OnesCountMasked(mask Mask16x16) Uint16x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTW, CPU Feature: AVX512BITALG
-func (x Uint16x32) OnesCountMasked(mask Mask16x32) Uint16x32
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x4) OnesCountMasked(mask Mask32x4) Uint32x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x8) OnesCountMasked(mask Mask32x8) Uint32x8
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint32x16) OnesCountMasked(mask Mask32x16) Uint32x16
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x2) OnesCountMasked(mask Mask64x2) Uint64x2
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x4) OnesCountMasked(mask Mask64x4) Uint64x4
-
-// OnesCountMasked counts the number of set bits in each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ
-func (x Uint64x8) OnesCountMasked(mask Mask64x8) Uint64x8
-
-/* Or */
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int8x16) Or(y Int8x16) Int8x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int8x32) Or(y Int8x32) Int8x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int8x64) Or(y Int8x64) Int8x64
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int16x8) Or(y Int16x8) Int16x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int16x16) Or(y Int16x16) Int16x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int16x32) Or(y Int16x32) Int16x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int32x4) Or(y Int32x4) Int32x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int32x8) Or(y Int32x8) Int32x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) Or(y Int32x16) Int32x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Int64x2) Or(y Int64x2) Int64x2
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Int64x4) Or(y Int64x4) Int64x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) Or(y Int64x8) Int64x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint8x16) Or(y Uint8x16) Uint8x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint8x32) Or(y Uint8x32) Uint8x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint8x64) Or(y Uint8x64) Uint8x64
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint16x8) Or(y Uint16x8) Uint16x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint16x16) Or(y Uint16x16) Uint16x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint16x32) Or(y Uint16x32) Uint16x32
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint32x4) Or(y Uint32x4) Uint32x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint32x8) Or(y Uint32x8) Uint32x8
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) Or(y Uint32x16) Uint32x16
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX
-func (x Uint64x2) Or(y Uint64x2) Uint64x2
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPOR, CPU Feature: AVX2
-func (x Uint64x4) Or(y Uint64x4) Uint64x4
-
-// Or performs a bitwise OR operation between two vectors.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) Or(y Uint64x8) Uint64x8
-
-/* OrMasked */
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x4) OrMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x8) OrMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Int32x16) OrMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x2) OrMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x4) OrMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Int64x8) OrMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x4) OrMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x8) OrMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORD, CPU Feature: AVX512
-func (x Uint32x16) OrMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x2) OrMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x4) OrMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// OrMasked performs a bitwise OR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPORQ, CPU Feature: AVX512
-func (x Uint64x8) OrMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
-/* Permute */
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute(indices Uint8x16) Int8x16
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
-
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute(indices Uint8x32) Int8x32
+// Asm: VPOR, CPU Feature: AVX
+func (x Int64x2) Or(y Int64x2) Int64x2
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
+// Asm: VPOR, CPU Feature: AVX2
+func (x Int64x4) Or(y Int64x4) Int64x4
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute(indices Uint8x64) Int8x64
+// Asm: VPORQ, CPU Feature: AVX512
+func (x Int64x8) Or(y Int64x8) Int64x8
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint8x16) Or(y Uint8x16) Uint8x16
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) Permute(indices Uint16x8) Int16x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint8x32) Or(y Uint8x32) Uint8x32
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint8x64) Or(y Uint8x64) Uint8x64
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) Permute(indices Uint16x16) Int16x16
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint16x8) Or(y Uint16x8) Uint16x8
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint16x16) Or(y Uint16x16) Uint16x16
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) Permute(indices Uint16x32) Int16x32
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint16x32) Or(y Uint16x32) Uint16x32
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint32x4) Or(y Uint32x4) Uint32x4
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMPS, CPU Feature: AVX2
-func (x Float32x8) Permute(indices Uint32x8) Float32x8
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint32x8) Or(y Uint32x8) Uint32x8
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMD, CPU Feature: AVX2
-func (x Int32x8) Permute(indices Uint32x8) Int32x8
+// Asm: VPORD, CPU Feature: AVX512
+func (x Uint32x16) Or(y Uint32x16) Uint32x16
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMD, CPU Feature: AVX2
-func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
+// Asm: VPOR, CPU Feature: AVX
+func (x Uint64x2) Or(y Uint64x2) Uint64x2
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) Permute(indices Uint32x16) Float32x16
+// Asm: VPOR, CPU Feature: AVX2
+func (x Uint64x4) Or(y Uint64x4) Uint64x4
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Or performs a bitwise OR operation between two vectors.
  //
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) Permute(indices Uint32x16) Int32x16
+// Asm: VPORQ, CPU Feature: AVX512
+func (x Uint64x8) Or(y Uint64x8) Uint64x8
  
-// Permute performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
+/* Permute */
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) Permute(indices Uint64x4) Float64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x16) Permute(indices Uint8x16) Int8x16
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) Permute(indices Uint64x4) Int64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x16) Permute(indices Uint8x16) Uint8x16
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x32) Permute(indices Uint8x32) Int8x32
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) Permute(indices Uint64x8) Float64x8
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x32) Permute(indices Uint8x32) Uint8x32
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
  // Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) Permute(indices Uint64x8) Int64x8
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Int8x64) Permute(indices Uint8x64) Int8x64
  
  // Permute performs a full permutation of vector x using indices:
  // result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
-
-/* Permute2 */
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32
-
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32
+// Asm: VPERMB, CPU Feature: AVX512VBMI
+func (x Uint8x64) Permute(indices Uint8x64) Uint8x64
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x8) Permute(indices Uint16x8) Int16x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x8) Permute(indices Uint16x8) Uint16x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x16) Permute(indices Uint16x16) Int16x16
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x16) Permute(indices Uint16x16) Uint16x16
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Int16x32) Permute(indices Uint16x32) Int16x32
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8
+// Asm: VPERMW, CPU Feature: AVX512
+func (x Uint16x32) Permute(indices Uint16x32) Uint16x32
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16
+// Asm: VPERMPS, CPU Feature: AVX2
+func (x Float32x8) Permute(indices Uint32x8) Float32x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Int32x8) Permute(indices Uint32x8) Int32x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16
+// Asm: VPERMD, CPU Feature: AVX2
+func (x Uint32x8) Permute(indices Uint32x8) Uint32x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2
+// Asm: VPERMPS, CPU Feature: AVX512
+func (x Float32x16) Permute(indices Uint32x16) Float32x16
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2
+// Asm: VPERMD, CPU Feature: AVX512
+func (x Int32x16) Permute(indices Uint32x16) Int32x16
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2
+// Asm: VPERMD, CPU Feature: AVX512
+func (x Uint32x16) Permute(indices Uint32x16) Uint32x16
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4
+// Asm: VPERMPD, CPU Feature: AVX512
+func (x Float64x4) Permute(indices Uint64x4) Float64x4
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Int64x4) Permute(indices Uint64x4) Int64x4
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Uint64x4) Permute(indices Uint64x4) Uint64x4
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8
+// Asm: VPERMPD, CPU Feature: AVX512
+func (x Float64x8) Permute(indices Uint64x8) Float64x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Int64x8) Permute(indices Uint64x8) Int64x8
  
-// Permute2 performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
+// Permute performs a full permutation of vector x using indices:
+// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
+// Only the needed bits to represent x's index are used in indices' elements.
  //
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
+// Asm: VPERMQ, CPU Feature: AVX512
+func (x Uint64x8) Permute(indices Uint64x8) Uint64x8
  
-/* Permute2Masked */
+/* Permute2 */
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x16) Permute2Masked(y Int8x16, indices Uint8x16, mask Mask8x16) Int8x16
+func (x Int8x16) Permute2(y Int8x16, indices Uint8x16) Int8x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x16) Permute2Masked(y Uint8x16, indices Uint8x16, mask Mask8x16) Uint8x16
+func (x Uint8x16) Permute2(y Uint8x16, indices Uint8x16) Uint8x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x32) Permute2Masked(y Int8x32, indices Uint8x32, mask Mask8x32) Int8x32
+func (x Int8x32) Permute2(y Int8x32, indices Uint8x32) Int8x32
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x32) Permute2Masked(y Uint8x32, indices Uint8x32, mask Mask8x32) Uint8x32
+func (x Uint8x32) Permute2(y Uint8x32, indices Uint8x32) Uint8x32
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Int8x64) Permute2Masked(y Int8x64, indices Uint8x64, mask Mask8x64) Int8x64
+func (x Int8x64) Permute2(y Int8x64, indices Uint8x64) Int8x64
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2B, CPU Feature: AVX512VBMI
-func (x Uint8x64) Permute2Masked(y Uint8x64, indices Uint8x64, mask Mask8x64) Uint8x64
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x8) Permute2Masked(y Int16x8, indices Uint16x8, mask Mask16x8) Int16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x8) Permute2Masked(y Uint16x8, indices Uint16x8, mask Mask16x8) Uint16x8
-
-// Permute2Masked performs a full permutation of vector x, y using indices:
-// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
-// where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x16) Permute2Masked(y Int16x16, indices Uint16x16, mask Mask16x16) Int16x16
+func (x Uint8x64) Permute2(y Uint8x64, indices Uint8x64) Uint8x64
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
-// Only the needed bits to represent xy's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
+// Only the needed bits to represent xy's index are used in indices' elements.
  //
  // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x16) Permute2Masked(y Uint16x16, indices Uint16x16, mask Mask16x16) Uint16x16
+func (x Int16x8) Permute2(y Int16x8, indices Uint16x8) Int16x8
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Int16x32) Permute2Masked(y Int16x32, indices Uint16x32, mask Mask16x32) Int16x32
+func (x Uint16x8) Permute2(y Uint16x8, indices Uint16x8) Uint16x8
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2W, CPU Feature: AVX512
-func (x Uint16x32) Permute2Masked(y Uint16x32, indices Uint16x32, mask Mask16x32) Uint16x32
+func (x Int16x16) Permute2(y Int16x16, indices Uint16x16) Int16x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x4) Permute2Masked(y Float32x4, indices Uint32x4, mask Mask32x4) Float32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Uint16x16) Permute2(y Uint16x16, indices Uint16x16) Uint16x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x4) Permute2Masked(y Int32x4, indices Uint32x4, mask Mask32x4) Int32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Int16x32) Permute2(y Int16x32, indices Uint16x32) Int16x32
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x4) Permute2Masked(y Uint32x4, indices Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPERMI2W, CPU Feature: AVX512
+func (x Uint16x32) Permute2(y Uint16x32, indices Uint16x32) Uint16x32
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x8) Permute2Masked(y Float32x8, indices Uint32x8, mask Mask32x8) Float32x8
+func (x Float32x4) Permute2(y Float32x4, indices Uint32x4) Float32x4
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x8) Permute2Masked(y Int32x8, indices Uint32x8, mask Mask32x8) Int32x8
+func (x Int32x4) Permute2(y Int32x4, indices Uint32x4) Int32x4
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x8) Permute2Masked(y Uint32x8, indices Uint32x8, mask Mask32x8) Uint32x8
+func (x Uint32x4) Permute2(y Uint32x4, indices Uint32x4) Uint32x4
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2PS, CPU Feature: AVX512
-func (x Float32x16) Permute2Masked(y Float32x16, indices Uint32x16, mask Mask32x16) Float32x16
+func (x Float32x8) Permute2(y Float32x8, indices Uint32x8) Float32x8
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Int32x16) Permute2Masked(y Int32x16, indices Uint32x16, mask Mask32x16) Int32x16
+func (x Int32x8) Permute2(y Int32x8, indices Uint32x8) Int32x8
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2D, CPU Feature: AVX512
-func (x Uint32x16) Permute2Masked(y Uint32x16, indices Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x8) Permute2(y Uint32x8, indices Uint32x8) Uint32x8
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x2) Permute2Masked(y Float64x2, indices Uint64x2, mask Mask64x2) Float64x2
+// Asm: VPERMI2PS, CPU Feature: AVX512
+func (x Float32x16) Permute2(y Float32x16, indices Uint32x16) Float32x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x2) Permute2Masked(y Int64x2, indices Uint64x2, mask Mask64x2) Int64x2
+// Asm: VPERMI2D, CPU Feature: AVX512
+func (x Int32x16) Permute2(y Int32x16, indices Uint32x16) Int32x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x2) Permute2Masked(y Uint64x2, indices Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPERMI2D, CPU Feature: AVX512
+func (x Uint32x16) Permute2(y Uint32x16, indices Uint32x16) Uint32x16
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x4) Permute2Masked(y Float64x4, indices Uint64x4, mask Mask64x4) Float64x4
+func (x Float64x2) Permute2(y Float64x2, indices Uint64x2) Float64x2
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x4) Permute2Masked(y Int64x4, indices Uint64x4, mask Mask64x4) Int64x4
+func (x Int64x2) Permute2(y Int64x2, indices Uint64x2) Int64x2
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x4) Permute2Masked(y Uint64x4, indices Uint64x4, mask Mask64x4) Uint64x4
+func (x Uint64x2) Permute2(y Uint64x2, indices Uint64x2) Uint64x2
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2PD, CPU Feature: AVX512
-func (x Float64x8) Permute2Masked(y Float64x8, indices Uint64x8, mask Mask64x8) Float64x8
+func (x Float64x4) Permute2(y Float64x4, indices Uint64x4) Float64x4
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Int64x8) Permute2Masked(y Int64x8, indices Uint64x8, mask Mask64x8) Int64x8
+func (x Int64x4) Permute2(y Int64x4, indices Uint64x4) Int64x4
  
-// Permute2Masked performs a full permutation of vector x, y using indices:
+// Permute2 performs a full permutation of vector x, y using indices:
  // result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
  // where xy is x appending y.
  // Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
-//
  // Asm: VPERMI2Q, CPU Feature: AVX512
-func (x Uint64x8) Permute2Masked(y Uint64x8, indices Uint64x8, mask Mask64x8) Uint64x8
-
-/* PermuteMasked */
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Int8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x16) PermuteMasked(indices Uint8x16, mask Mask8x16) Uint8x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Int8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x32) PermuteMasked(indices Uint8x32, mask Mask8x32) Uint8x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Int8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Int8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMB, CPU Feature: AVX512VBMI
-func (x Uint8x64) PermuteMasked(indices Uint8x64, mask Mask8x64) Uint8x64
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Int16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x8) PermuteMasked(indices Uint16x8, mask Mask16x8) Uint16x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Int16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x16) PermuteMasked(indices Uint16x16, mask Mask16x16) Uint16x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Int16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Int16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMW, CPU Feature: AVX512
-func (x Uint16x32) PermuteMasked(indices Uint16x32, mask Mask16x32) Uint16x32
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Float32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Int32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x8) PermuteMasked(indices Uint32x8, mask Mask32x8) Uint32x8
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPS, CPU Feature: AVX512
-func (x Float32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Float32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Int32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Int32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMD, CPU Feature: AVX512
-func (x Uint32x16) PermuteMasked(indices Uint32x16, mask Mask32x16) Uint32x16
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Float64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Int64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x4) PermuteMasked(indices Uint64x4, mask Mask64x4) Uint64x4
-
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPERMPD, CPU Feature: AVX512
-func (x Float64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Float64x8
+func (x Uint64x4) Permute2(y Uint64x4, indices Uint64x4) Uint64x4
  
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
-//
-// This operation is applied selectively under a write mask.
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
  //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Int64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Int64x8
+// Asm: VPERMI2PD, CPU Feature: AVX512
+func (x Float64x8) Permute2(y Float64x8, indices Uint64x8) Float64x8
  
-// PermuteMasked performs a full permutation of vector x using indices:
-// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]}
-// Only the needed bits to represent x's index are used in indices' elements.
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPERMI2Q, CPU Feature: AVX512
+func (x Int64x8) Permute2(y Int64x8, indices Uint64x8) Int64x8
+
+// Permute2 performs a full permutation of vector x, y using indices:
+// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]}
+// where xy is x appending y.
+// Only the needed bits to represent xy's index are used in indices' elements.
  //
-// Asm: VPERMQ, CPU Feature: AVX512
-func (x Uint64x8) PermuteMasked(indices Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPERMI2Q, CPU Feature: AVX512
+func (x Uint64x8) Permute2(y Uint64x8, indices Uint64x8) Uint64x8
  
  /* Reciprocal */
  
@@ -8094,50 +3846,6 @@ func (x Float64x4) Reciprocal() Float64x4
  // Asm: VRCP14PD, CPU Feature: AVX512
  func (x Float64x8) Reciprocal() Float64x8
  
-/* ReciprocalMasked */
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalMasked(mask Mask32x4) Float32x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalMasked(mask Mask32x8) Float32x8
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalMasked(mask Mask32x16) Float32x16
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalMasked(mask Mask64x2) Float64x2
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalMasked(mask Mask64x4) Float64x4
-
-// ReciprocalMasked computes an approximate reciprocal of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRCP14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalMasked(mask Mask64x8) Float64x8
-
  /* ReciprocalSqrt */
  
  // ReciprocalSqrt computes an approximate reciprocal of the square root of each element.
@@ -8170,50 +3878,6 @@ func (x Float64x4) ReciprocalSqrt() Float64x4
  // Asm: VRSQRT14PD, CPU Feature: AVX512
  func (x Float64x8) ReciprocalSqrt() Float64x8
  
-/* ReciprocalSqrtMasked */
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x4) ReciprocalSqrtMasked(mask Mask32x4) Float32x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x8) ReciprocalSqrtMasked(mask Mask32x8) Float32x8
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PS, CPU Feature: AVX512
-func (x Float32x16) ReciprocalSqrtMasked(mask Mask32x16) Float32x16
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x2) ReciprocalSqrtMasked(mask Mask64x2) Float64x2
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x4) ReciprocalSqrtMasked(mask Mask64x4) Float64x4
-
-// ReciprocalSqrtMasked computes an approximate reciprocal of the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VRSQRT14PD, CPU Feature: AVX512
-func (x Float64x8) ReciprocalSqrtMasked(mask Mask64x8) Float64x8
-
  /* RotateAllLeft */
  
  // RotateAllLeft rotates each element to the left by the number of bits specified by the immediate.
@@ -8300,116 +3964,6 @@ func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4
  // Asm: VPROLQ, CPU Feature: AVX512
  func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8
  
-/* RotateAllLeftMasked */
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Int32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllLeftMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllLeftMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllLeftMasked(shift uint8, mask Mask32x16) Uint32x16
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllLeftMasked(shift uint8, mask Mask64x2) Uint64x2
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllLeftMasked(shift uint8, mask Mask64x4) Uint64x4
-
-// RotateAllLeftMasked rotates each element to the left by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPROLQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllLeftMasked(shift uint8, mask Mask64x8) Uint64x8
-
  /* RotateAllRight */
  
  // RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
@@ -8467,144 +4021,34 @@ func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4
  //
  // Asm: VPRORD, CPU Feature: AVX512
  func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
-
-// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
-
-/* RotateAllRightMasked */
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Int32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Int32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Int32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Int32x16
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Int64x2
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Int64x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORQ, CPU Feature: AVX512
-func (x Int64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Int64x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x4) RotateAllRightMasked(shift uint8, mask Mask32x4) Uint32x4
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x8) RotateAllRightMasked(shift uint8, mask Mask32x8) Uint32x8
-
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
  // Asm: VPRORD, CPU Feature: AVX512
-func (x Uint32x16) RotateAllRightMasked(shift uint8, mask Mask32x16) Uint32x16
+func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16
  
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
  // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x2) RotateAllRightMasked(shift uint8, mask Mask64x2) Uint64x2
+func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2
  
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
  // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x4) RotateAllRightMasked(shift uint8, mask Mask64x4) Uint64x4
+func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4
  
-// RotateAllRightMasked rotates each element to the right by the number of bits specified by the immediate.
-//
-// This operation is applied selectively under a write mask.
+// RotateAllRight rotates each element to the right by the number of bits specified by the immediate.
  //
  // shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
  // Asm: VPRORQ, CPU Feature: AVX512
-func (x Uint64x8) RotateAllRightMasked(shift uint8, mask Mask64x8) Uint64x8
+func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8
  
  /* RotateLeft */
  
@@ -8668,92 +4112,6 @@ func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4
  // Asm: VPROLVQ, CPU Feature: AVX512
  func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8
  
-/* RotateLeftMasked */
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x4) RotateLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x8) RotateLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Int32x16) RotateLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x2) RotateLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x4) RotateLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Int64x8) RotateLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x4) RotateLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x8) RotateLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVD, CPU Feature: AVX512
-func (x Uint32x16) RotateLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateLeftMasked rotates each element in x to the left by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPROLVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
  /* RotateRight */
  
  // RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements.
@@ -8816,92 +4174,6 @@ func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4
  // Asm: VPRORVQ, CPU Feature: AVX512
  func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8
  
-/* RotateRightMasked */
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x4) RotateRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x8) RotateRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Int32x16) RotateRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x2) RotateRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x4) RotateRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Int64x8) RotateRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x4) RotateRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x8) RotateRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVD, CPU Feature: AVX512
-func (x Uint32x16) RotateRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x2) RotateRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x4) RotateRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// RotateRightMasked rotates each element in x to the right by the number of bits specified by y's corresponding elements.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPRORVQ, CPU Feature: AVX512
-func (x Uint64x8) RotateRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
  /* RoundToEven */
  
  // RoundToEven rounds elements to the nearest integer.
@@ -8968,62 +4240,6 @@ func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4
  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8
  
-/* RoundToEvenScaledMasked */
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledMasked rounds elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* RoundToEvenScaledResidue */
  
  // RoundToEvenScaledResidue computes the difference after rounding with specified precision.
@@ -9068,62 +4284,6 @@ func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4
  // Asm: VREDUCEPD, CPU Feature: AVX512
  func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8
  
-/* RoundToEvenScaledResidueMasked */
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) RoundToEvenScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// RoundToEvenScaledResidueMasked computes the difference after rounding with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* Scale */
  
  // Scale multiplies elements by a power of 2.
@@ -9131,74 +4291,30 @@ func (x Float64x8) RoundToEvenScaledResidueMasked(prec uint8, mask Mask64x8) Flo
  // Asm: VSCALEFPS, CPU Feature: AVX512
  func (x Float32x4) Scale(y Float32x4) Float32x4
  
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) Scale(y Float32x8) Float32x8
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) Scale(y Float32x16) Float32x16
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) Scale(y Float64x2) Float64x2
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) Scale(y Float64x4) Float64x4
-
-// Scale multiplies elements by a power of 2.
-//
-// Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) Scale(y Float64x8) Float64x8
-
-/* ScaleMasked */
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x4) ScaleMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x8) ScaleMasked(y Float32x8, mask Mask32x8) Float32x8
+func (x Float32x8) Scale(y Float32x8) Float32x8
  
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPS, CPU Feature: AVX512
-func (x Float32x16) ScaleMasked(y Float32x16, mask Mask32x16) Float32x16
+func (x Float32x16) Scale(y Float32x16) Float32x16
  
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x2) ScaleMasked(y Float64x2, mask Mask64x2) Float64x2
+func (x Float64x2) Scale(y Float64x2) Float64x2
  
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
+func (x Float64x4) Scale(y Float64x4) Float64x4
  
-// ScaleMasked multiplies elements by a power of 2.
-//
-// This operation is applied selectively under a write mask.
+// Scale multiplies elements by a power of 2.
  //
  // Asm: VSCALEFPD, CPU Feature: AVX512
-func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
+func (x Float64x8) Scale(y Float64x8) Float64x8
  
  /* SetElem */
  
@@ -9709,320 +4825,10 @@ func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4
  // ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
  //
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
-
-/* ShiftAllLeftConcatMasked */
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllLeftConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllLeftConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllLeftConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllLeftConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllLeftConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllLeftConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllLeftConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllLeftConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllLeftConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllLeftConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllLeftConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllLeftConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllLeftConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllLeftConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllLeftConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllLeftConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllLeftConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllLeftConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllLeftMasked */
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllLeftMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllLeftMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllLeftMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllLeftMasked(y uint64, mask Mask32x4) Uint32x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllLeftMasked(y uint64, mask Mask32x8) Uint32x8
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllLeftMasked(y uint64, mask Mask32x16) Uint32x16
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllLeftMasked(y uint64, mask Mask64x2) Uint64x2
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllLeftMasked(y uint64, mask Mask64x4) Uint64x4
-
-// ShiftAllLeftMasked shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSLLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllLeftMasked(y uint64, mask Mask64x8) Uint64x8
+// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8
  
  /* ShiftAllRight */
  
@@ -10217,360 +5023,50 @@ func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32
  // ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
  // immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
-
-// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
-
-/* ShiftAllRightConcatMasked */
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftAllRightConcatMasked(shift uint8, y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftAllRightConcatMasked(shift uint8, y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftAllRightConcatMasked(shift uint8, y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftAllRightConcatMasked(shift uint8, y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftAllRightConcatMasked(shift uint8, y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftAllRightConcatMasked(shift uint8, y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftAllRightConcatMasked(shift uint8, y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftAllRightConcatMasked(shift uint8, y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftAllRightConcatMasked(shift uint8, y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftAllRightConcatMasked(shift uint8, y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftAllRightConcatMasked(shift uint8, y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftAllRightConcatMasked(shift uint8, y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftAllRightConcatMasked(shift uint8, y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftAllRightConcatMasked(shift uint8, y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftAllRightConcatMasked(shift uint8, y Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftAllRightConcatMasked(shift uint8, y Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftAllRightConcatMasked(shift uint8, y Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftAllRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftAllRightConcatMasked(shift uint8, y Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftAllRightMasked */
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Int16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Int16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAW, CPU Feature: AVX512
-func (x Int16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Int16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Int32x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Int32x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAD, CPU Feature: AVX512
-func (x Int32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Int32x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Int64x2
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Int64x4
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAQ, CPU Feature: AVX512
-func (x Int64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Int64x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x8) ShiftAllRightMasked(y uint64, mask Mask16x8) Uint16x8
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x16) ShiftAllRightMasked(y uint64, mask Mask16x16) Uint16x16
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLW, CPU Feature: AVX512
-func (x Uint16x32) ShiftAllRightMasked(y uint64, mask Mask16x32) Uint16x32
-
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x4) ShiftAllRightMasked(y uint64, mask Mask32x4) Uint32x4
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4
  
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x8) ShiftAllRightMasked(y uint64, mask Mask32x8) Uint32x8
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8
  
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLD, CPU Feature: AVX512
-func (x Uint32x16) ShiftAllRightMasked(y uint64, mask Mask32x16) Uint32x16
+// Asm: VPSHRDD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16
  
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftAllRightMasked(y uint64, mask Mask64x2) Uint64x2
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2
  
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftAllRightMasked(y uint64, mask Mask64x4) Uint64x4
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4
  
-// ShiftAllRightMasked shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed.
+// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the
+// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table.
  //
-// Asm: VPSRLQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftAllRightMasked(y uint64, mask Mask64x8) Uint64x8
+// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8
  
  /* ShiftLeft */
  
@@ -10742,311 +5238,37 @@ func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32
  // corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
  // Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
-
-// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
-
-/* ShiftLeftConcatMasked */
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftLeftConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftLeftConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftLeftConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftLeftConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftLeftConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftLeftConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftLeftConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftLeftConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftLeftConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftLeftConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftLeftConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftLeftConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftLeftConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftLeftConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftLeftConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftLeftConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftLeftConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftLeftConcatMasked shifts each element of x to the left by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftLeftConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftLeftMasked */
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x8) ShiftLeftMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x16) ShiftLeftMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Int16x32) ShiftLeftMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x4) ShiftLeftMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x8) ShiftLeftMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Int32x16) ShiftLeftMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftLeftMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftLeftMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftLeftMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftLeftMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftLeftMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftLeftMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftLeftMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftLeftMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSLLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftLeftMasked(y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4
  
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftLeftMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8
  
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftLeftMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2
  
-// ShiftLeftMasked shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed.
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x.
  //
-// Asm: VPSLLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftLeftMasked(y Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8
  
  /* ShiftRight */
  
@@ -11187,342 +5409,68 @@ func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2
  // ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
  // corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
-
-// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
-
-/* ShiftRightConcatMasked */
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x8) ShiftRightConcatMasked(y Int16x8, z Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x16) ShiftRightConcatMasked(y Int16x16, z Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Int16x32) ShiftRightConcatMasked(y Int16x32, z Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x4) ShiftRightConcatMasked(y Int32x4, z Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x8) ShiftRightConcatMasked(y Int32x8, z Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Int32x16) ShiftRightConcatMasked(y Int32x16, z Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x2) ShiftRightConcatMasked(y Int64x2, z Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x4) ShiftRightConcatMasked(y Int64x4, z Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Int64x8) ShiftRightConcatMasked(y Int64x8, z Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x8) ShiftRightConcatMasked(y Uint16x8, z Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x16) ShiftRightConcatMasked(y Uint16x16, z Uint16x16, mask Mask16x16) Uint16x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
-func (x Uint16x32) ShiftRightConcatMasked(y Uint16x32, z Uint16x32, mask Mask16x32) Uint16x32
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x4) ShiftRightConcatMasked(y Uint32x4, z Uint32x4, mask Mask32x4) Uint32x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x8) ShiftRightConcatMasked(y Uint32x8, z Uint32x8, mask Mask32x8) Uint32x8
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
-func (x Uint32x16) ShiftRightConcatMasked(y Uint32x16, z Uint32x16, mask Mask32x16) Uint32x16
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x2) ShiftRightConcatMasked(y Uint64x2, z Uint64x2, mask Mask64x2) Uint64x2
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x4) ShiftRightConcatMasked(y Uint64x4, z Uint64x4, mask Mask64x4) Uint64x4
-
-// ShiftRightConcatMasked shifts each element of x to the right by the number of bits specified by the
-// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
-func (x Uint64x8) ShiftRightConcatMasked(y Uint64x8, z Uint64x8, mask Mask64x8) Uint64x8
-
-/* ShiftRightMasked */
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x8) ShiftRightMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x16) ShiftRightMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVW, CPU Feature: AVX512
-func (x Int16x32) ShiftRightMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x4) ShiftRightMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x8) ShiftRightMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVD, CPU Feature: AVX512
-func (x Int32x16) ShiftRightMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x2) ShiftRightMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x4) ShiftRightMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRAVQ, CPU Feature: AVX512
-func (x Int64x8) ShiftRightMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x8) ShiftRightMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x16) ShiftRightMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVW, CPU Feature: AVX512
-func (x Uint16x32) ShiftRightMasked(y Uint16x32, mask Mask16x32) Uint16x32
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x4) ShiftRightMasked(y Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x8) ShiftRightMasked(y Uint32x8, mask Mask32x8) Uint32x8
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
-//
-// This operation is applied selectively under a write mask.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVD, CPU Feature: AVX512
-func (x Uint32x16) ShiftRightMasked(y Uint32x16, mask Mask32x16) Uint32x16
+// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2
+func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x2) ShiftRightMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2
+func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x4) ShiftRightMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2
  
-// ShiftRightMasked shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed.
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4
+
+// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the
+// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x.
  //
-// Asm: VPSRLVQ, CPU Feature: AVX512
-func (x Uint64x8) ShiftRightMasked(y Uint64x8, mask Mask64x8) Uint64x8
+// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2
+func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8
  
  /* Sqrt */
  
@@ -11556,50 +5504,6 @@ func (x Float64x4) Sqrt() Float64x4
  // Asm: VSQRTPD, CPU Feature: AVX512
  func (x Float64x8) Sqrt() Float64x8
  
-/* SqrtMasked */
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x4) SqrtMasked(mask Mask32x4) Float32x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x8) SqrtMasked(mask Mask32x8) Float32x8
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPS, CPU Feature: AVX512
-func (x Float32x16) SqrtMasked(mask Mask32x16) Float32x16
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x2) SqrtMasked(mask Mask64x2) Float64x2
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x4) SqrtMasked(mask Mask64x4) Float64x4
-
-// SqrtMasked computes the square root of each element.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSQRTPD, CPU Feature: AVX512
-func (x Float64x8) SqrtMasked(mask Mask64x8) Float64x8
-
  /* Sub */
  
  // Sub subtracts corresponding elements of two vectors.
@@ -11702,267 +5606,55 @@ func (x Uint8x16) Sub(y Uint8x16) Uint8x16
  // Asm: VPSUBB, CPU Feature: AVX2
  func (x Uint8x32) Sub(y Uint8x32) Uint8x32
  
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) Sub(y Uint8x64) Uint8x64
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX
-func (x Uint16x8) Sub(y Uint16x8) Uint16x8
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX2
-func (x Uint16x16) Sub(y Uint16x16) Uint16x16
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) Sub(y Uint16x32) Uint16x32
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX
-func (x Uint32x4) Sub(y Uint32x4) Uint32x4
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX2
-func (x Uint32x8) Sub(y Uint32x8) Uint32x8
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) Sub(y Uint32x16) Uint32x16
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX
-func (x Uint64x2) Sub(y Uint64x2) Uint64x2
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX2
-func (x Uint64x4) Sub(y Uint64x4) Uint64x4
-
-// Sub subtracts corresponding elements of two vectors.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) Sub(y Uint64x8) Uint64x8
-
-/* SubMasked */
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x4) SubMasked(y Float32x4, mask Mask32x4) Float32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x8) SubMasked(y Float32x8, mask Mask32x8) Float32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPS, CPU Feature: AVX512
-func (x Float32x16) SubMasked(y Float32x16, mask Mask32x16) Float32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x2) SubMasked(y Float64x2, mask Mask64x2) Float64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x4) SubMasked(y Float64x4, mask Mask64x4) Float64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VSUBPD, CPU Feature: AVX512
-func (x Float64x8) SubMasked(y Float64x8, mask Mask64x8) Float64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x16) SubMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x32) SubMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Int8x64) SubMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x8) SubMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x16) SubMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Int16x32) SubMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x4) SubMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x8) SubMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Int32x16) SubMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x2) SubMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x4) SubMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Int64x8) SubMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x16) SubMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x32) SubMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBB, CPU Feature: AVX512
-func (x Uint8x64) SubMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x8) SubMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubMasked subtracts corresponding elements of two vectors.
+// Sub subtracts corresponding elements of two vectors.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSUBB, CPU Feature: AVX512
+func (x Uint8x64) Sub(y Uint8x64) Uint8x64
+
+// Sub subtracts corresponding elements of two vectors.
  //
-// Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x16) SubMasked(y Uint16x16, mask Mask16x16) Uint16x16
+// Asm: VPSUBW, CPU Feature: AVX
+func (x Uint16x8) Sub(y Uint16x8) Uint16x8
  
-// SubMasked subtracts corresponding elements of two vectors.
+// Sub subtracts corresponding elements of two vectors.
  //
-// This operation is applied selectively under a write mask.
+// Asm: VPSUBW, CPU Feature: AVX2
+func (x Uint16x16) Sub(y Uint16x16) Uint16x16
+
+// Sub subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBW, CPU Feature: AVX512
-func (x Uint16x32) SubMasked(y Uint16x32, mask Mask16x32) Uint16x32
+func (x Uint16x32) Sub(y Uint16x32) Uint16x32
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x4) SubMasked(y Uint32x4, mask Mask32x4) Uint32x4
+// Asm: VPSUBD, CPU Feature: AVX
+func (x Uint32x4) Sub(y Uint32x4) Uint32x4
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
-// Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x8) SubMasked(y Uint32x8, mask Mask32x8) Uint32x8
+// Asm: VPSUBD, CPU Feature: AVX2
+func (x Uint32x8) Sub(y Uint32x8) Uint32x8
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBD, CPU Feature: AVX512
-func (x Uint32x16) SubMasked(y Uint32x16, mask Mask32x16) Uint32x16
+func (x Uint32x16) Sub(y Uint32x16) Uint32x16
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x2) SubMasked(y Uint64x2, mask Mask64x2) Uint64x2
+// Asm: VPSUBQ, CPU Feature: AVX
+func (x Uint64x2) Sub(y Uint64x2) Uint64x2
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
-// Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x4) SubMasked(y Uint64x4, mask Mask64x4) Uint64x4
+// Asm: VPSUBQ, CPU Feature: AVX2
+func (x Uint64x4) Sub(y Uint64x4) Uint64x4
  
-// SubMasked subtracts corresponding elements of two vectors.
-//
-// This operation is applied selectively under a write mask.
+// Sub subtracts corresponding elements of two vectors.
  //
  // Asm: VPSUBQ, CPU Feature: AVX512
-func (x Uint64x8) SubMasked(y Uint64x8, mask Mask64x8) Uint64x8
+func (x Uint64x8) Sub(y Uint64x8) Uint64x8
  
  /* SubPairs */
  
@@ -12114,92 +5806,6 @@ func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16
  // Asm: VPSUBUSW, CPU Feature: AVX512
  func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32
  
-/* SubSaturatedMasked */
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x16) SubSaturatedMasked(y Int8x16, mask Mask8x16) Int8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x32) SubSaturatedMasked(y Int8x32, mask Mask8x32) Int8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSB, CPU Feature: AVX512
-func (x Int8x64) SubSaturatedMasked(y Int8x64, mask Mask8x64) Int8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x8) SubSaturatedMasked(y Int16x8, mask Mask16x8) Int16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x16) SubSaturatedMasked(y Int16x16, mask Mask16x16) Int16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBSW, CPU Feature: AVX512
-func (x Int16x32) SubSaturatedMasked(y Int16x32, mask Mask16x32) Int16x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x16) SubSaturatedMasked(y Uint8x16, mask Mask8x16) Uint8x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x32) SubSaturatedMasked(y Uint8x32, mask Mask8x32) Uint8x32
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSB, CPU Feature: AVX512
-func (x Uint8x64) SubSaturatedMasked(y Uint8x64, mask Mask8x64) Uint8x64
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x8) SubSaturatedMasked(y Uint16x8, mask Mask16x8) Uint16x8
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x16) SubSaturatedMasked(y Uint16x16, mask Mask16x16) Uint16x16
-
-// SubSaturatedMasked subtracts corresponding elements of two vectors with saturation.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPSUBUSW, CPU Feature: AVX512
-func (x Uint16x32) SubSaturatedMasked(y Uint16x32, mask Mask16x32) Uint16x32
-
  /* Trunc */
  
  // Trunc truncates elements towards zero.
@@ -12266,62 +5872,6 @@ func (x Float64x4) TruncScaled(prec uint8) Float64x4
  // Asm: VRNDSCALEPD, CPU Feature: AVX512
  func (x Float64x8) TruncScaled(prec uint8) Float64x8
  
-/* TruncScaledMasked */
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledMasked truncates elements with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VRNDSCALEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* TruncScaledResidue */
  
  // TruncScaledResidue computes the difference after truncating with specified precision.
@@ -12366,62 +5916,6 @@ func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4
  // Asm: VREDUCEPD, CPU Feature: AVX512
  func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8
  
-/* TruncScaledResidueMasked */
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x4) TruncScaledResidueMasked(prec uint8, mask Mask32x4) Float32x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x8) TruncScaledResidueMasked(prec uint8, mask Mask32x8) Float32x8
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPS, CPU Feature: AVX512
-func (x Float32x16) TruncScaledResidueMasked(prec uint8, mask Mask32x16) Float32x16
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x2) TruncScaledResidueMasked(prec uint8, mask Mask64x2) Float64x2
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x4) TruncScaledResidueMasked(prec uint8, mask Mask64x4) Float64x4
-
-// TruncScaledResidueMasked computes the difference after truncating with specified precision.
-//
-// This operation is applied selectively under a write mask.
-//
-// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table.
-//
-// Asm: VREDUCEPD, CPU Feature: AVX512
-func (x Float64x8) TruncScaledResidueMasked(prec uint8, mask Mask64x8) Float64x8
-
  /* Xor */
  
  // Xor performs a bitwise XOR operation between two vectors.
@@ -12544,92 +6038,6 @@ func (x Uint64x4) Xor(y Uint64x4) Uint64x4
  // Asm: VPXORQ, CPU Feature: AVX512
  func (x Uint64x8) Xor(y Uint64x8) Uint64x8
  
-/* XorMasked */
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x4) XorMasked(y Int32x4, mask Mask32x4) Int32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x8) XorMasked(y Int32x8, mask Mask32x8) Int32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Int32x16) XorMasked(y Int32x16, mask Mask32x16) Int32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x2) XorMasked(y Int64x2, mask Mask64x2) Int64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x4) XorMasked(y Int64x4, mask Mask64x4) Int64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Int64x8) XorMasked(y Int64x8, mask Mask64x8) Int64x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x4) XorMasked(y Uint32x4, mask Mask32x4) Uint32x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x8) XorMasked(y Uint32x8, mask Mask32x8) Uint32x8
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORD, CPU Feature: AVX512
-func (x Uint32x16) XorMasked(y Uint32x16, mask Mask32x16) Uint32x16
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x2) XorMasked(y Uint64x2, mask Mask64x2) Uint64x2
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x4) XorMasked(y Uint64x4, mask Mask64x4) Uint64x4
-
-// XorMasked performs a bitwise XOR operation between two vectors.
-//
-// This operation is applied selectively under a write mask.
-//
-// Asm: VPXORQ, CPU Feature: AVX512
-func (x Uint64x8) XorMasked(y Uint64x8, mask Mask64x8) Uint64x8
-
  /* blend */
  
  // blend blends two vectors based on mask values, choosing either
diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go

index 3faeeaccfdefabdb3c18ef701c4dd59150302ff1..c88fe4b9fef58e6ffc42d42e833bdd1d8be47d7b 100644 (file)
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@@ -43,7 +43,7 @@ func TestType(t *testing.T) {
                 return
         }
         v.z = maskT(simd.Mask32x4FromBits(0b0011))
-       *v.y = v.y.AddMasked(v.x, simd.Mask32x4(v.z))
+       *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z))
  
         got := [4]int32{}
         v.y.Store(&got)
@@ -121,7 +121,7 @@ func TestMaskConversion(t *testing.T) {
         }
         x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0})
         mask := simd.Int32x4{}.Sub(x).ToMask()
-       y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).AddMasked(x, mask)
+       y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask)
         want := [4]int32{6, 0, 10, 0}
         got := make([]int32, 4)
         y.StoreSlice(got)
@@ -327,7 +327,7 @@ func TestBitMaskLoad(t *testing.T) {
         results := [2]int64{}
         want := [2]int64{0, 6}
         m := simd.LoadMask64x2FromBits(&bits)
-       simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+       simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
         for i := range 2 {
                 if results[i] != want[i] {
                         t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
@@ -359,7 +359,7 @@ func TestBitMaskFromBits(t *testing.T) {
         results := [2]int64{}
         want := [2]int64{0, 6}
         m := simd.Mask64x2FromBits(0b10)
-       simd.LoadInt64x2Slice([]int64{1, 2}).AddMasked(simd.LoadInt64x2Slice([]int64{3, 4}), m).Store(&results)
+       simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
         for i := range 2 {
                 if results[i] != want[i] {
                         t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
author	Junyang Shao <shaojunyang@google.com>
	Mon, 18 Aug 2025 21:13:00 +0000 (21:13 +0000)
committer	Junyang Shao <shaojunyang@google.com>
	Tue, 19 Aug 2025 20:46:58 +0000 (13:46 -0700)
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/simdintrinsics.go		patch \| blob \| history
src/simd/_gen/simdgen/godefs.go		patch \| blob \| history
src/simd/compare_test.go		patch \| blob \| history
src/simd/ops_amd64.go		patch \| blob \| history
src/simd/simd_test.go		patch \| blob \| history